from google.colab import drive
drive.mount('/content/drive')
data_dir = "/content/drive/MyDrive/MIT/3. Spring 2022/6.869/6.869 Project/v-coco"
# %cd /content/drive/MyDrive/MIT/3.\ Spring\ 2022/6.869/6.869\ Project/v-coco/
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
%cd /content/drive/.shortcut-targets-by-id/1Eg2dTJ9lloHImzZiTfOZ7_jOP1sTh--c/6.869 Project/v-coco/
/content/drive/.shortcut-targets-by-id/1Eg2dTJ9lloHImzZiTfOZ7_jOP1sTh--c/6.869 Project/v-coco
!pip install -U torch torchvision
!pip install git+https://github.com/facebookresearch/fvcore.git
import torch, torchvision
torch.__version__
Requirement already satisfied: torch in /usr/local/lib/python3.7/dist-packages (1.11.0+cu113)
Requirement already satisfied: torchvision in /usr/local/lib/python3.7/dist-packages (0.12.0+cu113)
Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from torch) (4.2.0)
Requirement already satisfied: pillow!=8.3.*,>=5.3.0 in /usr/local/lib/python3.7/dist-packages (from torchvision) (7.1.2)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from torchvision) (1.21.6)
Requirement already satisfied: requests in /usr/local/lib/python3.7/dist-packages (from torchvision) (2.23.0)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision) (3.0.4)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision) (2021.10.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision) (1.24.3)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests->torchvision) (2.10)
Collecting git+https://github.com/facebookresearch/fvcore.git
Cloning https://github.com/facebookresearch/fvcore.git to /tmp/pip-req-build-1zxl_lz6
Running command git clone -q https://github.com/facebookresearch/fvcore.git /tmp/pip-req-build-1zxl_lz6
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from fvcore==0.1.5) (1.21.6)
Collecting yacs>=0.1.6
Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting pyyaml>=5.1
Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
|████████████████████████████████| 596 kB 4.3 MB/s
Requirement already satisfied: tqdm in /usr/local/lib/python3.7/dist-packages (from fvcore==0.1.5) (4.64.0)
Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.7/dist-packages (from fvcore==0.1.5) (1.1.0)
Requirement already satisfied: Pillow in /usr/local/lib/python3.7/dist-packages (from fvcore==0.1.5) (7.1.2)
Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from fvcore==0.1.5) (0.8.9)
Collecting iopath>=0.1.7
Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting portalocker
Downloading portalocker-2.4.0-py2.py3-none-any.whl (16 kB)
Building wheels for collected packages: fvcore
Building wheel for fvcore (setup.py) ... done
Created wheel for fvcore: filename=fvcore-0.1.5-py3-none-any.whl size=65172 sha256=4dd2c48a44b3f6088e98d5a602739f54f755f9e7149f476bda211083063457d4
Stored in directory: /tmp/pip-ephem-wheel-cache-wz77symq/wheels/24/1d/09/8167de727fe5b74f832b6fcb5d9069d8f03ca29f337bfe484d
Successfully built fvcore
Installing collected packages: pyyaml, portalocker, yacs, iopath, fvcore
Attempting uninstall: pyyaml
Found existing installation: PyYAML 3.13
Uninstalling PyYAML-3.13:
Successfully uninstalled PyYAML-3.13
Successfully installed fvcore-0.1.5 iopath-0.1.9 portalocker-2.4.0 pyyaml-6.0 yacs-0.1.8
'1.11.0+cu113'
!git clone https://github.com/facebookresearch/detectron2 detectron2_repo
!pip install -e detectron2_repo
fatal: destination path 'detectron2_repo' already exists and is not an empty directory.
Obtaining file:///content/drive/.shortcut-targets-by-id/1Eg2dTJ9lloHImzZiTfOZ7_jOP1sTh--c/6.869%20Project/v-coco/detectron2_repo
Requirement already satisfied: Pillow>=7.1 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (7.1.2)
Requirement already satisfied: matplotlib in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (3.2.2)
Requirement already satisfied: pycocotools>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (2.0.4)
Requirement already satisfied: termcolor>=1.1 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (1.1.0)
Requirement already satisfied: yacs>=0.1.8 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (0.1.8)
Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (0.8.9)
Requirement already satisfied: cloudpickle in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (1.3.0)
Requirement already satisfied: tqdm>4.29.0 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (4.64.0)
Requirement already satisfied: tensorboard in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (2.8.0)
Requirement already satisfied: fvcore<0.1.6,>=0.1.5 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (0.1.5)
Requirement already satisfied: iopath<0.1.10,>=0.1.7 in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (0.1.9)
Requirement already satisfied: future in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (0.16.0)
Requirement already satisfied: pydot in /usr/local/lib/python3.7/dist-packages (from detectron2==0.6) (1.3.0)
Collecting omegaconf>=2.1
Downloading omegaconf-2.1.2-py3-none-any.whl (74 kB)
|████████████████████████████████| 74 kB 2.0 MB/s
Collecting hydra-core>=1.1
Downloading hydra_core-1.1.2-py3-none-any.whl (147 kB)
|████████████████████████████████| 147 kB 26.9 MB/s
Collecting black==21.4b2
Downloading black-21.4b2-py3-none-any.whl (130 kB)
|████████████████████████████████| 130 kB 47.1 MB/s
Collecting scipy>1.5.1
Downloading scipy-1.7.3-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (38.1 MB)
|████████████████████████████████| 38.1 MB 1.3 MB/s
Collecting toml>=0.10.1
Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting mypy-extensions>=0.4.3
Downloading mypy_extensions-0.4.3-py2.py3-none-any.whl (4.5 kB)
Collecting pathspec<1,>=0.8.1
Downloading pathspec-0.9.0-py2.py3-none-any.whl (31 kB)
Collecting regex>=2020.1.8
Downloading regex-2022.4.24-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (749 kB)
|████████████████████████████████| 749 kB 47.5 MB/s
Requirement already satisfied: typing-extensions>=3.7.4 in /usr/local/lib/python3.7/dist-packages (from black==21.4b2->detectron2==0.6) (4.2.0)
Requirement already satisfied: click>=7.1.2 in /usr/local/lib/python3.7/dist-packages (from black==21.4b2->detectron2==0.6) (7.1.2)
Requirement already satisfied: appdirs in /usr/local/lib/python3.7/dist-packages (from black==21.4b2->detectron2==0.6) (1.4.4)
Collecting typed-ast>=1.4.2
Downloading typed_ast-1.5.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (843 kB)
|████████████████████████████████| 843 kB 53.3 MB/s
Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.7/dist-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (6.0)
Requirement already satisfied: numpy in /usr/local/lib/python3.7/dist-packages (from fvcore<0.1.6,>=0.1.5->detectron2==0.6) (1.21.6)
Collecting importlib-resources<5.3
Downloading importlib_resources-5.2.3-py3-none-any.whl (27 kB)
Collecting antlr4-python3-runtime==4.8
Downloading antlr4-python3-runtime-4.8.tar.gz (112 kB)
|████████████████████████████████| 112 kB 60.5 MB/s
Requirement already satisfied: zipp>=3.1.0 in /usr/local/lib/python3.7/dist-packages (from importlib-resources<5.3->hydra-core>=1.1->detectron2==0.6) (3.8.0)
Requirement already satisfied: portalocker in /usr/local/lib/python3.7/dist-packages (from iopath<0.1.10,>=0.1.7->detectron2==0.6) (2.4.0)
Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->detectron2==0.6) (3.0.8)
Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.7/dist-packages (from matplotlib->detectron2==0.6) (0.11.0)
Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->detectron2==0.6) (1.4.2)
Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.7/dist-packages (from matplotlib->detectron2==0.6) (2.8.2)
Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.1->matplotlib->detectron2==0.6) (1.15.0)
Requirement already satisfied: grpcio>=1.24.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (1.44.0)
Requirement already satisfied: google-auth-oauthlib<0.5,>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (0.4.6)
Requirement already satisfied: absl-py>=0.4 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (1.0.0)
Requirement already satisfied: protobuf>=3.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (3.17.3)
Requirement already satisfied: setuptools>=41.0.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (57.4.0)
Requirement already satisfied: tensorboard-plugin-wit>=1.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (1.8.1)
Requirement already satisfied: wheel>=0.26 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (0.37.1)
Requirement already satisfied: requests<3,>=2.21.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (2.23.0)
Requirement already satisfied: markdown>=2.6.8 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (3.3.6)
Requirement already satisfied: tensorboard-data-server<0.7.0,>=0.6.0 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (0.6.1)
Requirement already satisfied: werkzeug>=0.11.15 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (1.0.1)
Requirement already satisfied: google-auth<3,>=1.6.3 in /usr/local/lib/python3.7/dist-packages (from tensorboard->detectron2==0.6) (1.35.0)
Requirement already satisfied: cachetools<5.0,>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.2.4)
Requirement already satisfied: pyasn1-modules>=0.2.1 in /usr/local/lib/python3.7/dist-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.2.8)
Requirement already satisfied: rsa<5,>=3.1.4 in /usr/local/lib/python3.7/dist-packages (from google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (4.8)
Requirement already satisfied: requests-oauthlib>=0.7.0 in /usr/local/lib/python3.7/dist-packages (from google-auth-oauthlib<0.5,>=0.4.1->tensorboard->detectron2==0.6) (1.3.1)
Requirement already satisfied: importlib-metadata>=4.4 in /usr/local/lib/python3.7/dist-packages (from markdown>=2.6.8->tensorboard->detectron2==0.6) (4.11.3)
Requirement already satisfied: pyasn1<0.5.0,>=0.4.6 in /usr/local/lib/python3.7/dist-packages (from pyasn1-modules>=0.2.1->google-auth<3,>=1.6.3->tensorboard->detectron2==0.6) (0.4.8)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (1.24.3)
Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2021.10.8)
Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (2.10)
Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests<3,>=2.21.0->tensorboard->detectron2==0.6) (3.0.4)
Requirement already satisfied: oauthlib>=3.0.0 in /usr/local/lib/python3.7/dist-packages (from requests-oauthlib>=0.7.0->google-auth-oauthlib<0.5,>=0.4.1->tensorboard->detectron2==0.6) (3.2.0)
Building wheels for collected packages: antlr4-python3-runtime
Building wheel for antlr4-python3-runtime (setup.py) ... done
Created wheel for antlr4-python3-runtime: filename=antlr4_python3_runtime-4.8-py3-none-any.whl size=141230 sha256=81281dda8dbbdbb02c852c1b208a2d3197969afa54ef1e649e9a62f044948d33
Stored in directory: /root/.cache/pip/wheels/ca/33/b7/336836125fc9bb4ceaa4376d8abca10ca8bc84ddc824baea6c
Successfully built antlr4-python3-runtime
Installing collected packages: antlr4-python3-runtime, typed-ast, toml, regex, pathspec, omegaconf, mypy-extensions, importlib-resources, scipy, hydra-core, black, detectron2
Attempting uninstall: regex
Found existing installation: regex 2019.12.20
Uninstalling regex-2019.12.20:
Successfully uninstalled regex-2019.12.20
Attempting uninstall: importlib-resources
Found existing installation: importlib-resources 5.7.1
Uninstalling importlib-resources-5.7.1:
Successfully uninstalled importlib-resources-5.7.1
Attempting uninstall: scipy
Found existing installation: scipy 1.4.1
Uninstalling scipy-1.4.1:
Successfully uninstalled scipy-1.4.1
Running setup.py develop for detectron2
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
albumentations 0.1.12 requires imgaug<0.2.7,>=0.2.5, but you have imgaug 0.2.9 which is incompatible.
Successfully installed antlr4-python3-runtime-4.8 black-21.4b2 detectron2-0.6 hydra-core-1.1.2 importlib-resources-5.2.3 mypy-extensions-0.4.3 omegaconf-2.1.2 pathspec-0.9.0 regex-2022.4.24 scipy-1.7.3 toml-0.10.2 typed-ast-1.5.3
# Some basic setup
import vsrl_utils as vu
import vsrl_eval as ve
import json
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import some common libraries
import matplotlib.pyplot as plt
import numpy as np
import cv2
from google.colab.patches import cv2_imshow
# import some common detectron2 utilities
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
Register the V-COCO dataset to detectron2.
# Load COCO annotations for V-COCO images
coco = vu.load_coco()
# Load the VCOCO annotations for vcoco_train image set
vcoco_all = vu.load_vcoco('vcoco_val')
for x in vcoco_all:
x = vu.attach_gt_boxes(x, coco)
increment = 0
!python script_pick_annotations_val_only.py coco/annotations
!python script_pick_annotations_train_only.py coco/annotations
coco/annotations, data Loading training annotations from coco/annotations/instances_train2014.json Loading validating annotations from coco/annotations/instances_val2014.json Writing COCO annotations needed for V-COCO to data/instances_vcoco_val_2014_t.json. coco/annotations, data Loading training annotations from coco/annotations/instances_train2014.json Loading validating annotations from coco/annotations/instances_val2014.json Writing COCO annotations needed for V-COCO to data/instances_vcoco_train_2014.json.
relevant_classes = ["hold", "carry", "point", "eat", "drink", "stand", "talk_on_phone"] #["hold", "look", "carry", "point", "hit", "eat", "drink", "stand", "talk_on_phone"]
classes = [x['action_name'] for x in vcoco_all]
final_json = {"images": [], "annotations": [], "categories": []}
all_ids = []
for category, action in enumerate(relevant_classes):
cls_id = classes.index(action)
vcoco = vcoco_all[cls_id]
positive_index = np.where(vcoco['label'] == 1)[0]
image_ids = vcoco['image_id'][positive_index][:,0]
anno_ids = vcoco['ann_id'][positive_index][:,0]
final_data = {}
final_data["images"] = coco.loadImgs(ids=image_ids)
action_annos = coco.loadAnns(ids=anno_ids)[:]
new_annos = []
for a_anno in action_annos:
new_anno = {**a_anno}
new_anno["category_id"] = category + 1
new_anno["id"] = int(str((category+1)*10000000000) + str(a_anno["id"]))
new_annos.append(new_anno)
# if a_anno["id"] in all_ids: print(a_anno["id"])
all_ids.append(new_anno["id"])
# print(int(str((category+1)*10000000000) + str(a_anno["id"])))
final_data["annotations"] = new_annos
final_data["categories"] = [{"supercategory": action, "id": category+1, "name": action}]
for key in final_json.keys():
final_json[key] = [*final_json[key], *final_data[key]]
increment = increment + 1
with open("./data/instances_vcoco_actions_train.json", 'w') as fp:
json.dump(final_json, fp)
from detectron2.data.datasets import register_coco_instances
register_coco_instances("rel_actions"+str(increment), {}, "./data/instances_vcoco_actions_train.json", "./coco/train2014")
print("rel_actions"+str(increment))
rel_actions1
train_metadata = MetadataCatalog.get("rel_actions"+str(increment))
train_dicts = DatasetCatalog.get("rel_actions"+str(increment))
[05/08 23:31:46 d2.data.datasets.coco]: Loaded 1951 images in COCO format from ./data/instances_vcoco_actions_train.json
increment = increment + 1
with open("./data/instances_vcoco_actions_val_2014.json", 'w') as fp:
json.dump(final_json, fp)
from detectron2.data.datasets import register_coco_instances
register_coco_instances("rel_actions_val"+str(increment), {}, "./data/instances_vcoco_actions_val_2014.json", "./coco/train2014")
print("rel_actions_val"+str(increment))
val_metadata = MetadataCatalog.get("rel_actions_val"+str(increment))
val_dicts = DatasetCatalog.get("rel_actions_val"+str(increment))
rel_actions_val3
[05/09 00:13:52 d2.data.datasets.coco]: Loaded 2215 images in COCO format from ./data/instances_vcoco_actions_val_2014.json
To verify the data loading is correct, let's visualize the annotations of randomly selected samples in the training set:
import random
for d in random.sample(train_dicts, 3):
# print(d)
img = cv2.imread(d["file_name"]) #Start at 7 due to script name mismatch
visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=0.5)
vis = visualizer.draw_dataset_dict(d)
cv2_imshow(vis.get_image()[:, :, ::-1])
Now, let's fine-tune a coco-pretrained R50-FPN Mask R-CNN model on the fruits_nuts dataset. It takes ~6 minutes to train 300 iterations on Colab's K80 GPU.
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
import os
cfg = get_cfg()
cfg.merge_from_file("./detectron2_repo/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("rel_actions1",)
cfg.DATASETS.TEST = () # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl" # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.002
cfg.SOLVER.MAX_ITER = 1000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(relevant_classes) # all relevant classes
cfg.OUTPUT_DIR = "./outputs/action_only_detector"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
[05/08 23:44:11 d2.engine.defaults]: Model: GeneralizedRCNN( (backbone): FPN( (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelMaxPool() (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (proposal_generator): RPN( (rpn_head): StandardRPNHead( (conv): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)) (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) ) (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) ) (roi_heads): StandardROIHeads( (box_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (box_head): FastRCNNConvFCHead( (flatten): Flatten(start_dim=1, end_dim=-1) (fc1): Linear(in_features=12544, out_features=1024, bias=True) (fc_relu1): ReLU() (fc2): Linear(in_features=1024, out_features=1024, bias=True) (fc_relu2): ReLU() ) (box_predictor): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=8, bias=True) (bbox_pred): Linear(in_features=1024, out_features=28, bias=True) ) (mask_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(14, 14), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(14, 14), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(14, 14), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(14, 14), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (mask_head): MaskRCNNConvUpsampleHead( (mask_fcn1): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn3): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (mask_fcn4): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (deconv): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2)) (deconv_relu): ReLU() (predictor): Conv2d(256, 7, kernel_size=(1, 1), stride=(1, 1)) ) ) ) [05/08 23:44:11 d2.data.datasets.coco]: Loaded 1951 images in COCO format from ./data/instances_vcoco_actions_train.json [05/08 23:44:11 d2.data.build]: Removed 0 images with no usable annotations. 1951 images left. [05/08 23:44:12 d2.data.build]: Distribution of instances among all 7 categories: | category | #instances | category | #instances | category | #instances | |:-------------:|:-------------|:----------:|:-------------|:----------:|:-------------| | hold | 1838 | carry | 236 | point | 15 | | eat | 301 | drink | 63 | stand | 2150 | | talk_on_phone | 167 | | | | | | total | 4770 | | | | | [05/08 23:44:12 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()] [05/08 23:44:12 d2.data.build]: Using training sampler TrainingSampler [05/08 23:44:12 d2.data.common]: Serializing 1951 elements to byte tensors and concatenating them all ... [05/08 23:44:12 d2.data.common]: Serialized dataset takes 4.90 MiB WARNING [05/08 23:44:12 d2.solver.build]: SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.
model_final_f10217.pkl: 178MB [00:12, 14.6MB/s] Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (8, 1024) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (8,) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (28, 1024) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (28,) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (7, 256, 1, 1) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (7,) in the model! You might want to double check if this is expected. Some model parameters or buffers are not found in the checkpoint: roi_heads.box_predictor.bbox_pred.{bias, weight} roi_heads.box_predictor.cls_score.{bias, weight} roi_heads.mask_head.predictor.{bias, weight}
[05/08 23:44:30 d2.engine.train_loop]: Starting training from iteration 0
/usr/local/lib/python3.7/dist-packages/torch/functional.py:568: UserWarning: torch.meshgrid: in an upcoming release, it will be required to pass the indexing argument. (Triggered internally at ../aten/src/ATen/native/TensorShape.cpp:2228.) return _VF.meshgrid(tensors, **kwargs) # type: ignore[attr-defined]
[05/08 23:44:45 d2.utils.events]: eta: 0:11:46 iter: 19 total_loss: 3.509 loss_cls: 2.275 loss_box_reg: 0.5488 loss_mask: 0.6903 loss_rpn_cls: 0.03365 loss_rpn_loc: 0.004783 time: 0.6797 data_time: 0.4443 lr: 3.9962e-05 max_mem: 2545M [05/08 23:44:59 d2.utils.events]: eta: 0:11:25 iter: 39 total_loss: 2.852 loss_cls: 1.574 loss_box_reg: 0.5157 loss_mask: 0.6691 loss_rpn_cls: 0.02743 loss_rpn_loc: 0.004821 time: 0.6894 data_time: 0.4270 lr: 7.9922e-05 max_mem: 2545M [05/08 23:45:14 d2.utils.events]: eta: 0:11:12 iter: 59 total_loss: 1.92 loss_cls: 0.7083 loss_box_reg: 0.5131 loss_mask: 0.6261 loss_rpn_cls: 0.01519 loss_rpn_loc: 0.005434 time: 0.7122 data_time: 0.4793 lr: 0.00011988 max_mem: 2545M [05/08 23:45:28 d2.utils.events]: eta: 0:10:55 iter: 79 total_loss: 1.858 loss_cls: 0.5534 loss_box_reg: 0.6597 loss_mask: 0.5698 loss_rpn_cls: 0.01562 loss_rpn_loc: 0.00593 time: 0.7083 data_time: 0.4169 lr: 0.00015984 max_mem: 2545M [05/08 23:45:43 d2.utils.events]: eta: 0:10:41 iter: 99 total_loss: 1.649 loss_cls: 0.4709 loss_box_reg: 0.629 loss_mask: 0.5024 loss_rpn_cls: 0.01413 loss_rpn_loc: 0.003994 time: 0.7145 data_time: 0.4705 lr: 0.0001998 max_mem: 2758M [05/08 23:45:58 d2.utils.events]: eta: 0:10:26 iter: 119 total_loss: 1.417 loss_cls: 0.3935 loss_box_reg: 0.5969 loss_mask: 0.4463 loss_rpn_cls: 0.01225 loss_rpn_loc: 0.004268 time: 0.7216 data_time: 0.4856 lr: 0.00023976 max_mem: 2758M [05/08 23:46:12 d2.utils.events]: eta: 0:10:11 iter: 139 total_loss: 1.177 loss_cls: 0.3305 loss_box_reg: 0.5064 loss_mask: 0.329 loss_rpn_cls: 0.009698 loss_rpn_loc: 0.005238 time: 0.7206 data_time: 0.4357 lr: 0.00027972 max_mem: 2758M [05/08 23:46:25 d2.utils.events]: eta: 0:09:51 iter: 159 total_loss: 0.9623 loss_cls: 0.2355 loss_box_reg: 0.4394 loss_mask: 0.2282 loss_rpn_cls: 0.007012 loss_rpn_loc: 0.004906 time: 0.7102 data_time: 0.3691 lr: 0.00031968 max_mem: 2758M [05/08 23:46:39 d2.utils.events]: eta: 0:09:34 iter: 179 total_loss: 0.9734 loss_cls: 0.2494 loss_box_reg: 0.4906 loss_mask: 0.2062 loss_rpn_cls: 0.007104 loss_rpn_loc: 0.00683 time: 0.7070 data_time: 0.4068 lr: 0.00035964 max_mem: 2758M [05/08 23:46:52 d2.utils.events]: eta: 0:09:17 iter: 199 total_loss: 0.7626 loss_cls: 0.2272 loss_box_reg: 0.3877 loss_mask: 0.1662 loss_rpn_cls: 0.004023 loss_rpn_loc: 0.005579 time: 0.7018 data_time: 0.3963 lr: 0.0003996 max_mem: 2758M [05/08 23:47:06 d2.utils.events]: eta: 0:08:59 iter: 219 total_loss: 0.6499 loss_cls: 0.1997 loss_box_reg: 0.2687 loss_mask: 0.1704 loss_rpn_cls: 0.006145 loss_rpn_loc: 0.01117 time: 0.7044 data_time: 0.4561 lr: 0.00043956 max_mem: 2758M [05/08 23:47:20 d2.utils.events]: eta: 0:08:45 iter: 239 total_loss: 0.6414 loss_cls: 0.1949 loss_box_reg: 0.2449 loss_mask: 0.1675 loss_rpn_cls: 0.007746 loss_rpn_loc: 0.006541 time: 0.7012 data_time: 0.3887 lr: 0.00047952 max_mem: 2758M [05/08 23:47:34 d2.utils.events]: eta: 0:08:32 iter: 259 total_loss: 0.6398 loss_cls: 0.1775 loss_box_reg: 0.2551 loss_mask: 0.1514 loss_rpn_cls: 0.003853 loss_rpn_loc: 0.007165 time: 0.7021 data_time: 0.4405 lr: 0.00051948 max_mem: 2758M [05/08 23:47:48 d2.utils.events]: eta: 0:08:17 iter: 279 total_loss: 0.6898 loss_cls: 0.1958 loss_box_reg: 0.2795 loss_mask: 0.1758 loss_rpn_cls: 0.002961 loss_rpn_loc: 0.00508 time: 0.7004 data_time: 0.4070 lr: 0.00055944 max_mem: 2758M [05/08 23:48:01 d2.utils.events]: eta: 0:08:02 iter: 299 total_loss: 0.6019 loss_cls: 0.2215 loss_box_reg: 0.2301 loss_mask: 0.1476 loss_rpn_cls: 0.001727 loss_rpn_loc: 0.006422 time: 0.6976 data_time: 0.3794 lr: 0.0005994 max_mem: 2758M [05/08 23:48:13 d2.utils.events]: eta: 0:07:48 iter: 319 total_loss: 0.5421 loss_cls: 0.1544 loss_box_reg: 0.2133 loss_mask: 0.1664 loss_rpn_cls: 0.004455 loss_rpn_loc: 0.005853 time: 0.6936 data_time: 0.3608 lr: 0.00063936 max_mem: 2758M [05/08 23:48:27 d2.utils.events]: eta: 0:07:32 iter: 339 total_loss: 0.6201 loss_cls: 0.1972 loss_box_reg: 0.2262 loss_mask: 0.1595 loss_rpn_cls: 0.001733 loss_rpn_loc: 0.005763 time: 0.6922 data_time: 0.3985 lr: 0.00067932 max_mem: 2758M [05/08 23:48:40 d2.utils.events]: eta: 0:07:17 iter: 359 total_loss: 0.5356 loss_cls: 0.1841 loss_box_reg: 0.1714 loss_mask: 0.1613 loss_rpn_cls: 0.001398 loss_rpn_loc: 0.003595 time: 0.6914 data_time: 0.4123 lr: 0.00071928 max_mem: 2758M [05/08 23:48:55 d2.utils.events]: eta: 0:07:03 iter: 379 total_loss: 0.5604 loss_cls: 0.1602 loss_box_reg: 0.2137 loss_mask: 0.1598 loss_rpn_cls: 0.003006 loss_rpn_loc: 0.007538 time: 0.6920 data_time: 0.4284 lr: 0.00075924 max_mem: 2758M [05/08 23:49:08 d2.utils.events]: eta: 0:06:50 iter: 399 total_loss: 0.5451 loss_cls: 0.1769 loss_box_reg: 0.1712 loss_mask: 0.152 loss_rpn_cls: 0.0008314 loss_rpn_loc: 0.005317 time: 0.6920 data_time: 0.4265 lr: 0.0007992 max_mem: 2758M [05/08 23:49:23 d2.utils.events]: eta: 0:06:37 iter: 419 total_loss: 0.6469 loss_cls: 0.1963 loss_box_reg: 0.2509 loss_mask: 0.1429 loss_rpn_cls: 0.001343 loss_rpn_loc: 0.004646 time: 0.6942 data_time: 0.4671 lr: 0.00083916 max_mem: 2758M [05/08 23:49:38 d2.utils.events]: eta: 0:06:22 iter: 439 total_loss: 0.6079 loss_cls: 0.1721 loss_box_reg: 0.2213 loss_mask: 0.1614 loss_rpn_cls: 0.002106 loss_rpn_loc: 0.006757 time: 0.6952 data_time: 0.4372 lr: 0.00087912 max_mem: 2758M [05/08 23:49:53 d2.utils.events]: eta: 0:06:09 iter: 459 total_loss: 0.5329 loss_cls: 0.1746 loss_box_reg: 0.1964 loss_mask: 0.1625 loss_rpn_cls: 0.0007963 loss_rpn_loc: 0.005657 time: 0.6980 data_time: 0.4804 lr: 0.00091908 max_mem: 2758M [05/08 23:50:06 d2.utils.events]: eta: 0:05:55 iter: 479 total_loss: 0.6376 loss_cls: 0.2231 loss_box_reg: 0.2272 loss_mask: 0.1416 loss_rpn_cls: 0.000534 loss_rpn_loc: 0.005627 time: 0.6962 data_time: 0.3852 lr: 0.00095904 max_mem: 2758M [05/08 23:50:19 d2.utils.events]: eta: 0:05:41 iter: 499 total_loss: 0.5207 loss_cls: 0.1565 loss_box_reg: 0.2171 loss_mask: 0.1651 loss_rpn_cls: 0.001735 loss_rpn_loc: 0.008417 time: 0.6955 data_time: 0.3949 lr: 0.000999 max_mem: 2758M [05/08 23:50:32 d2.utils.events]: eta: 0:05:27 iter: 519 total_loss: 0.6803 loss_cls: 0.1874 loss_box_reg: 0.2232 loss_mask: 0.211 loss_rpn_cls: 0.001165 loss_rpn_loc: 0.006802 time: 0.6938 data_time: 0.3818 lr: 0.001039 max_mem: 2758M [05/08 23:50:46 d2.utils.events]: eta: 0:05:13 iter: 539 total_loss: 0.6538 loss_cls: 0.2187 loss_box_reg: 0.2295 loss_mask: 0.1532 loss_rpn_cls: 0.001852 loss_rpn_loc: 0.005097 time: 0.6929 data_time: 0.3954 lr: 0.0010789 max_mem: 2758M [05/08 23:50:59 d2.utils.events]: eta: 0:04:59 iter: 559 total_loss: 0.607 loss_cls: 0.179 loss_box_reg: 0.1992 loss_mask: 0.1563 loss_rpn_cls: 0.00148 loss_rpn_loc: 0.004432 time: 0.6922 data_time: 0.3964 lr: 0.0011189 max_mem: 2758M [05/08 23:51:14 d2.utils.events]: eta: 0:04:46 iter: 579 total_loss: 0.6847 loss_cls: 0.2268 loss_box_reg: 0.2463 loss_mask: 0.1967 loss_rpn_cls: 0.002075 loss_rpn_loc: 0.006982 time: 0.6931 data_time: 0.4470 lr: 0.0011588 max_mem: 2758M [05/08 23:51:27 d2.utils.events]: eta: 0:04:32 iter: 599 total_loss: 0.6654 loss_cls: 0.2124 loss_box_reg: 0.264 loss_mask: 0.1868 loss_rpn_cls: 0.002221 loss_rpn_loc: 0.006894 time: 0.6924 data_time: 0.3940 lr: 0.0011988 max_mem: 2758M [05/08 23:51:40 d2.utils.events]: eta: 0:04:18 iter: 619 total_loss: 0.5272 loss_cls: 0.1701 loss_box_reg: 0.1946 loss_mask: 0.1323 loss_rpn_cls: 0.001854 loss_rpn_loc: 0.003515 time: 0.6912 data_time: 0.3696 lr: 0.0012388 max_mem: 2758M [05/08 23:51:54 d2.utils.events]: eta: 0:04:04 iter: 639 total_loss: 0.586 loss_cls: 0.1475 loss_box_reg: 0.2384 loss_mask: 0.1607 loss_rpn_cls: 0.0008062 loss_rpn_loc: 0.005421 time: 0.6906 data_time: 0.4001 lr: 0.0012787 max_mem: 2758M [05/08 23:52:07 d2.utils.events]: eta: 0:03:51 iter: 659 total_loss: 0.5878 loss_cls: 0.1659 loss_box_reg: 0.2227 loss_mask: 0.1995 loss_rpn_cls: 0.003757 loss_rpn_loc: 0.004428 time: 0.6896 data_time: 0.3835 lr: 0.0013187 max_mem: 2758M [05/08 23:52:20 d2.utils.events]: eta: 0:03:37 iter: 679 total_loss: 0.6112 loss_cls: 0.1709 loss_box_reg: 0.2152 loss_mask: 0.1502 loss_rpn_cls: 0.000707 loss_rpn_loc: 0.005767 time: 0.6881 data_time: 0.3700 lr: 0.0013586 max_mem: 2758M [05/08 23:52:34 d2.utils.events]: eta: 0:03:23 iter: 699 total_loss: 0.567 loss_cls: 0.2016 loss_box_reg: 0.1862 loss_mask: 0.1611 loss_rpn_cls: 0.00108 loss_rpn_loc: 0.005898 time: 0.6886 data_time: 0.4387 lr: 0.0013986 max_mem: 2758M [05/08 23:52:48 d2.utils.events]: eta: 0:03:09 iter: 719 total_loss: 0.5998 loss_cls: 0.1429 loss_box_reg: 0.2213 loss_mask: 0.1731 loss_rpn_cls: 0.001619 loss_rpn_loc: 0.007896 time: 0.6886 data_time: 0.4136 lr: 0.0014386 max_mem: 2758M [05/08 23:53:01 d2.utils.events]: eta: 0:02:55 iter: 739 total_loss: 0.7415 loss_cls: 0.2587 loss_box_reg: 0.259 loss_mask: 0.2077 loss_rpn_cls: 0.003711 loss_rpn_loc: 0.004918 time: 0.6874 data_time: 0.3692 lr: 0.0014785 max_mem: 2758M [05/08 23:53:14 d2.utils.events]: eta: 0:02:42 iter: 759 total_loss: 0.5897 loss_cls: 0.1851 loss_box_reg: 0.2103 loss_mask: 0.1915 loss_rpn_cls: 0.001144 loss_rpn_loc: 0.00487 time: 0.6873 data_time: 0.4155 lr: 0.0015185 max_mem: 2758M [05/08 23:53:27 d2.utils.events]: eta: 0:02:28 iter: 779 total_loss: 0.6183 loss_cls: 0.1405 loss_box_reg: 0.2124 loss_mask: 0.1609 loss_rpn_cls: 0.002169 loss_rpn_loc: 0.006691 time: 0.6866 data_time: 0.3883 lr: 0.0015584 max_mem: 2758M [05/08 23:53:41 d2.utils.events]: eta: 0:02:15 iter: 799 total_loss: 0.6244 loss_cls: 0.1784 loss_box_reg: 0.2373 loss_mask: 0.1556 loss_rpn_cls: 0.001868 loss_rpn_loc: 0.00604 time: 0.6860 data_time: 0.3901 lr: 0.0015984 max_mem: 2758M [05/08 23:53:54 d2.utils.events]: eta: 0:02:01 iter: 819 total_loss: 0.532 loss_cls: 0.1634 loss_box_reg: 0.1875 loss_mask: 0.1526 loss_rpn_cls: 0.001021 loss_rpn_loc: 0.004913 time: 0.6850 data_time: 0.3742 lr: 0.0016384 max_mem: 2758M [05/08 23:54:07 d2.utils.events]: eta: 0:01:47 iter: 839 total_loss: 0.5917 loss_cls: 0.1534 loss_box_reg: 0.2076 loss_mask: 0.1468 loss_rpn_cls: 0.001153 loss_rpn_loc: 0.006929 time: 0.6842 data_time: 0.3797 lr: 0.0016783 max_mem: 2758M [05/08 23:54:19 d2.utils.events]: eta: 0:01:34 iter: 859 total_loss: 0.6272 loss_cls: 0.2112 loss_box_reg: 0.2271 loss_mask: 0.1577 loss_rpn_cls: 0.002351 loss_rpn_loc: 0.006446 time: 0.6824 data_time: 0.3372 lr: 0.0017183 max_mem: 2758M [05/08 23:54:32 d2.utils.events]: eta: 0:01:20 iter: 879 total_loss: 0.5061 loss_cls: 0.1737 loss_box_reg: 0.1993 loss_mask: 0.1525 loss_rpn_cls: 0.001229 loss_rpn_loc: 0.005237 time: 0.6817 data_time: 0.3815 lr: 0.0017582 max_mem: 2758M [05/08 23:54:46 d2.utils.events]: eta: 0:01:07 iter: 899 total_loss: 0.6134 loss_cls: 0.1872 loss_box_reg: 0.2271 loss_mask: 0.156 loss_rpn_cls: 0.0004449 loss_rpn_loc: 0.006331 time: 0.6822 data_time: 0.4234 lr: 0.0017982 max_mem: 2758M [05/08 23:55:00 d2.utils.events]: eta: 0:00:53 iter: 919 total_loss: 0.6265 loss_cls: 0.2008 loss_box_reg: 0.2045 loss_mask: 0.1336 loss_rpn_cls: 0.001015 loss_rpn_loc: 0.005531 time: 0.6823 data_time: 0.4159 lr: 0.0018382 max_mem: 2758M [05/08 23:55:13 d2.utils.events]: eta: 0:00:40 iter: 939 total_loss: 0.5976 loss_cls: 0.2227 loss_box_reg: 0.1932 loss_mask: 0.1524 loss_rpn_cls: 0.0007851 loss_rpn_loc: 0.00524 time: 0.6823 data_time: 0.4041 lr: 0.0018781 max_mem: 2758M [05/08 23:55:27 d2.utils.events]: eta: 0:00:26 iter: 959 total_loss: 0.5818 loss_cls: 0.1942 loss_box_reg: 0.1894 loss_mask: 0.1421 loss_rpn_cls: 0.001578 loss_rpn_loc: 0.005245 time: 0.6822 data_time: 0.3996 lr: 0.0019181 max_mem: 2758M [05/08 23:55:38 d2.utils.events]: eta: 0:00:13 iter: 979 total_loss: 0.6051 loss_cls: 0.2029 loss_box_reg: 0.2244 loss_mask: 0.1565 loss_rpn_cls: 0.001146 loss_rpn_loc: 0.006171 time: 0.6794 data_time: 0.2656 lr: 0.001958 max_mem: 2758M [05/08 23:55:45 d2.utils.events]: eta: 0:00:00 iter: 999 total_loss: 0.5994 loss_cls: 0.17 loss_box_reg: 0.2439 loss_mask: 0.1669 loss_rpn_cls: 0.001093 loss_rpn_loc: 0.006357 time: 0.6719 data_time: 0.0140 lr: 0.001998 max_mem: 2758M [05/08 23:55:45 d2.engine.hooks]: Overall training speed: 998 iterations in 0:11:10 (0.6719 s / it) [05/08 23:55:45 d2.engine.hooks]: Total training time: 0:11:12 (0:00:02 on hooks)
Now, we perform inference with the trained model on the fruits_nuts dataset. First, let's create a predictor using the model we just trained:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model
cfg.DATASETS.TEST = ("rel_actions_val3", )
predictor = DefaultPredictor(cfg)
[05/09 00:15:16 d2.checkpoint.c2_model_loading]: Following weights matched with model:
| Names in Model | Names in Checkpoint | Shapes |
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.* | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,1,1) |
| backbone.bottom_up.res2.0.conv2.* | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.0.conv3.* | backbone.bottom_up.res2.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.0.shortcut.* | backbone.bottom_up.res2.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.1.conv1.* | backbone.bottom_up.res2.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.1.conv2.* | backbone.bottom_up.res2.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.1.conv3.* | backbone.bottom_up.res2.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.2.conv1.* | backbone.bottom_up.res2.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.2.conv2.* | backbone.bottom_up.res2.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.2.conv3.* | backbone.bottom_up.res2.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res3.0.conv1.* | backbone.bottom_up.res3.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,256,1,1) |
| backbone.bottom_up.res3.0.conv2.* | backbone.bottom_up.res3.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.0.conv3.* | backbone.bottom_up.res3.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.0.shortcut.* | backbone.bottom_up.res3.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,256,1,1) |
| backbone.bottom_up.res3.1.conv1.* | backbone.bottom_up.res3.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.1.conv2.* | backbone.bottom_up.res3.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.1.conv3.* | backbone.bottom_up.res3.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.2.conv1.* | backbone.bottom_up.res3.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.2.conv2.* | backbone.bottom_up.res3.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.2.conv3.* | backbone.bottom_up.res3.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.3.conv1.* | backbone.bottom_up.res3.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.3.conv2.* | backbone.bottom_up.res3.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.3.conv3.* | backbone.bottom_up.res3.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res4.0.conv1.* | backbone.bottom_up.res4.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,512,1,1) |
| backbone.bottom_up.res4.0.conv2.* | backbone.bottom_up.res4.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.0.conv3.* | backbone.bottom_up.res4.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.0.shortcut.* | backbone.bottom_up.res4.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,512,1,1) |
| backbone.bottom_up.res4.1.conv1.* | backbone.bottom_up.res4.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.1.conv2.* | backbone.bottom_up.res4.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.1.conv3.* | backbone.bottom_up.res4.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.2.conv1.* | backbone.bottom_up.res4.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.2.conv2.* | backbone.bottom_up.res4.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.2.conv3.* | backbone.bottom_up.res4.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.3.conv1.* | backbone.bottom_up.res4.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.3.conv2.* | backbone.bottom_up.res4.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.3.conv3.* | backbone.bottom_up.res4.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.4.conv1.* | backbone.bottom_up.res4.4.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.4.conv2.* | backbone.bottom_up.res4.4.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.4.conv3.* | backbone.bottom_up.res4.4.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.5.conv1.* | backbone.bottom_up.res4.5.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.5.conv2.* | backbone.bottom_up.res4.5.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.5.conv3.* | backbone.bottom_up.res4.5.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res5.0.conv1.* | backbone.bottom_up.res5.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,1024,1,1) |
| backbone.bottom_up.res5.0.conv2.* | backbone.bottom_up.res5.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.0.conv3.* | backbone.bottom_up.res5.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.0.shortcut.* | backbone.bottom_up.res5.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,1024,1,1) |
| backbone.bottom_up.res5.1.conv1.* | backbone.bottom_up.res5.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.1.conv2.* | backbone.bottom_up.res5.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.1.conv3.* | backbone.bottom_up.res5.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.2.conv1.* | backbone.bottom_up.res5.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.2.conv2.* | backbone.bottom_up.res5.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.2.conv3.* | backbone.bottom_up.res5.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.stem.conv1.* | backbone.bottom_up.stem.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,3,7,7) |
| backbone.fpn_lateral2.* | backbone.fpn_lateral2.{bias,weight} | (256,) (256,256,1,1) |
| backbone.fpn_lateral3.* | backbone.fpn_lateral3.{bias,weight} | (256,) (256,512,1,1) |
| backbone.fpn_lateral4.* | backbone.fpn_lateral4.{bias,weight} | (256,) (256,1024,1,1) |
| backbone.fpn_lateral5.* | backbone.fpn_lateral5.{bias,weight} | (256,) (256,2048,1,1) |
| backbone.fpn_output2.* | backbone.fpn_output2.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output3.* | backbone.fpn_output3.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output4.* | backbone.fpn_output4.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output5.* | backbone.fpn_output5.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.anchor_deltas.* | proposal_generator.rpn_head.anchor_deltas.{bias,weight} | (12,) (12,256,1,1) |
| proposal_generator.rpn_head.conv.* | proposal_generator.rpn_head.conv.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.objectness_logits.* | proposal_generator.rpn_head.objectness_logits.{bias,weight} | (3,) (3,256,1,1) |
| roi_heads.box_head.fc1.* | roi_heads.box_head.fc1.{bias,weight} | (1024,) (1024,12544) |
| roi_heads.box_head.fc2.* | roi_heads.box_head.fc2.{bias,weight} | (1024,) (1024,1024) |
| roi_heads.box_predictor.bbox_pred.* | roi_heads.box_predictor.bbox_pred.{bias,weight} | (28,) (28,1024) |
| roi_heads.box_predictor.cls_score.* | roi_heads.box_predictor.cls_score.{bias,weight} | (8,) (8,1024) |
| roi_heads.mask_head.deconv.* | roi_heads.mask_head.deconv.{bias,weight} | (256,) (256,256,2,2) |
| roi_heads.mask_head.mask_fcn1.* | roi_heads.mask_head.mask_fcn1.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn2.* | roi_heads.mask_head.mask_fcn2.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn3.* | roi_heads.mask_head.mask_fcn3.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn4.* | roi_heads.mask_head.mask_fcn4.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.predictor.* | roi_heads.mask_head.predictor.{bias,weight} | (7,) (7,256,1,1) |
Then, we randomly select several samples to visualize the prediction results.
from detectron2.utils.visualizer import ColorMode
for d in random.sample(train_dicts, 3):
im = cv2.imread(d["file_name"])
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1],
metadata=train_metadata,
scale=0.8,
instance_mode=ColorMode.IMAGE_BW # remove the colors of unsegmented pixels
)
v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2_imshow(v.get_image()[:, :, ::-1])
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.engine import DefaultTrainer
evaluator = COCOEvaluator("rel_actions_val3", cfg, False, output_dir="./output_evals/")
val_loader = build_detection_test_loader(cfg, "rel_actions_val3")
inference_on_dataset(trainer.model, val_loader, evaluator)
WARNING [05/09 00:15:25 d2.evaluation.coco_evaluation]: COCO Evaluator instantiated using config, this is deprecated behavior. Please pass in explicit arguments instead. [05/09 00:15:25 d2.data.datasets.coco]: Loaded 2215 images in COCO format from ./data/instances_vcoco_actions_val_2014.json [05/09 00:15:26 d2.data.build]: Distribution of instances among all 7 categories: | category | #instances | category | #instances | category | #instances | |:-------------:|:-------------|:----------:|:-------------|:----------:|:-------------| | hold | 2163 | carry | 262 | point | 23 | | eat | 376 | drink | 70 | stand | 2448 | | talk_on_phone | 187 | | | | | | total | 5529 | | | | | [05/09 00:15:26 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')] [05/09 00:15:26 d2.data.common]: Serializing 2215 elements to byte tensors and concatenating them all ... [05/09 00:15:26 d2.data.common]: Serialized dataset takes 5.66 MiB [05/09 00:15:26 d2.evaluation.evaluator]: Start inference on 2215 batches [05/09 00:15:30 d2.evaluation.evaluator]: Inference done 11/2215. Dataloading: 0.1800 s/iter. Inference: 0.0651 s/iter. Eval: 0.0063 s/iter. Total: 0.2513 s/iter. ETA=0:09:13 [05/09 00:15:35 d2.evaluation.evaluator]: Inference done 28/2215. Dataloading: 0.2276 s/iter. Inference: 0.0669 s/iter. Eval: 0.0073 s/iter. Total: 0.3020 s/iter. ETA=0:11:00 [05/09 00:15:41 d2.evaluation.evaluator]: Inference done 44/2215. Dataloading: 0.2372 s/iter. Inference: 0.0679 s/iter. Eval: 0.0079 s/iter. Total: 0.3132 s/iter. ETA=0:11:19 [05/09 00:15:46 d2.evaluation.evaluator]: Inference done 58/2215. Dataloading: 0.2552 s/iter. Inference: 0.0675 s/iter. Eval: 0.0081 s/iter. Total: 0.3310 s/iter. ETA=0:11:53 [05/09 00:15:51 d2.evaluation.evaluator]: Inference done 73/2215. Dataloading: 0.2561 s/iter. Inference: 0.0684 s/iter. Eval: 0.0080 s/iter. Total: 0.3326 s/iter. ETA=0:11:52 [05/09 00:15:56 d2.evaluation.evaluator]: Inference done 88/2215. Dataloading: 0.2577 s/iter. Inference: 0.0684 s/iter. Eval: 0.0081 s/iter. Total: 0.3345 s/iter. ETA=0:11:51 [05/09 00:16:02 d2.evaluation.evaluator]: Inference done 104/2215. Dataloading: 0.2600 s/iter. Inference: 0.0681 s/iter. Eval: 0.0088 s/iter. Total: 0.3371 s/iter. ETA=0:11:51 [05/09 00:16:07 d2.evaluation.evaluator]: Inference done 120/2215. Dataloading: 0.2593 s/iter. Inference: 0.0681 s/iter. Eval: 0.0088 s/iter. Total: 0.3363 s/iter. ETA=0:11:44 [05/09 00:16:13 d2.evaluation.evaluator]: Inference done 136/2215. Dataloading: 0.2606 s/iter. Inference: 0.0679 s/iter. Eval: 0.0087 s/iter. Total: 0.3374 s/iter. ETA=0:11:41 [05/09 00:16:18 d2.evaluation.evaluator]: Inference done 152/2215. Dataloading: 0.2592 s/iter. Inference: 0.0680 s/iter. Eval: 0.0089 s/iter. Total: 0.3363 s/iter. ETA=0:11:33 [05/09 00:16:23 d2.evaluation.evaluator]: Inference done 168/2215. Dataloading: 0.2594 s/iter. Inference: 0.0679 s/iter. Eval: 0.0086 s/iter. Total: 0.3360 s/iter. ETA=0:11:27 [05/09 00:16:29 d2.evaluation.evaluator]: Inference done 184/2215. Dataloading: 0.2593 s/iter. Inference: 0.0680 s/iter. Eval: 0.0087 s/iter. Total: 0.3362 s/iter. ETA=0:11:22 [05/09 00:16:34 d2.evaluation.evaluator]: Inference done 200/2215. Dataloading: 0.2593 s/iter. Inference: 0.0680 s/iter. Eval: 0.0087 s/iter. Total: 0.3363 s/iter. ETA=0:11:17 [05/09 00:16:39 d2.evaluation.evaluator]: Inference done 216/2215. Dataloading: 0.2586 s/iter. Inference: 0.0681 s/iter. Eval: 0.0088 s/iter. Total: 0.3357 s/iter. ETA=0:11:11 [05/09 00:16:44 d2.evaluation.evaluator]: Inference done 231/2215. Dataloading: 0.2597 s/iter. Inference: 0.0682 s/iter. Eval: 0.0087 s/iter. Total: 0.3368 s/iter. ETA=0:11:08 [05/09 00:16:50 d2.evaluation.evaluator]: Inference done 246/2215. Dataloading: 0.2601 s/iter. Inference: 0.0681 s/iter. Eval: 0.0086 s/iter. Total: 0.3370 s/iter. ETA=0:11:03 [05/09 00:16:55 d2.evaluation.evaluator]: Inference done 262/2215. Dataloading: 0.2603 s/iter. Inference: 0.0682 s/iter. Eval: 0.0085 s/iter. Total: 0.3371 s/iter. ETA=0:10:58 [05/09 00:17:00 d2.evaluation.evaluator]: Inference done 276/2215. Dataloading: 0.2617 s/iter. Inference: 0.0682 s/iter. Eval: 0.0085 s/iter. Total: 0.3387 s/iter. ETA=0:10:56 [05/09 00:17:05 d2.evaluation.evaluator]: Inference done 289/2215. Dataloading: 0.2638 s/iter. Inference: 0.0683 s/iter. Eval: 0.0086 s/iter. Total: 0.3409 s/iter. ETA=0:10:56 [05/09 00:17:10 d2.evaluation.evaluator]: Inference done 305/2215. Dataloading: 0.2627 s/iter. Inference: 0.0682 s/iter. Eval: 0.0086 s/iter. Total: 0.3397 s/iter. ETA=0:10:48 [05/09 00:17:15 d2.evaluation.evaluator]: Inference done 319/2215. Dataloading: 0.2638 s/iter. Inference: 0.0682 s/iter. Eval: 0.0085 s/iter. Total: 0.3407 s/iter. ETA=0:10:45 [05/09 00:17:21 d2.evaluation.evaluator]: Inference done 334/2215. Dataloading: 0.2653 s/iter. Inference: 0.0681 s/iter. Eval: 0.0084 s/iter. Total: 0.3420 s/iter. ETA=0:10:43 [05/09 00:17:26 d2.evaluation.evaluator]: Inference done 348/2215. Dataloading: 0.2671 s/iter. Inference: 0.0681 s/iter. Eval: 0.0083 s/iter. Total: 0.3438 s/iter. ETA=0:10:41 [05/09 00:17:50 d2.evaluation.evaluator]: Inference done 359/2215. Dataloading: 0.2652 s/iter. Inference: 0.1269 s/iter. Eval: 0.0084 s/iter. Total: 0.4007 s/iter. ETA=0:12:23 [05/09 00:17:55 d2.evaluation.evaluator]: Inference done 379/2215. Dataloading: 0.2603 s/iter. Inference: 0.1238 s/iter. Eval: 0.0084 s/iter. Total: 0.3927 s/iter. ETA=0:12:00 [05/09 00:18:01 d2.evaluation.evaluator]: Inference done 394/2215. Dataloading: 0.2616 s/iter. Inference: 0.1216 s/iter. Eval: 0.0083 s/iter. Total: 0.3917 s/iter. ETA=0:11:53 [05/09 00:18:06 d2.evaluation.evaluator]: Inference done 410/2215. Dataloading: 0.2620 s/iter. Inference: 0.1195 s/iter. Eval: 0.0083 s/iter. Total: 0.3900 s/iter. ETA=0:11:43 [05/09 00:18:12 d2.evaluation.evaluator]: Inference done 425/2215. Dataloading: 0.2632 s/iter. Inference: 0.1177 s/iter. Eval: 0.0082 s/iter. Total: 0.3893 s/iter. ETA=0:11:36 [05/09 00:18:17 d2.evaluation.evaluator]: Inference done 440/2215. Dataloading: 0.2637 s/iter. Inference: 0.1160 s/iter. Eval: 0.0082 s/iter. Total: 0.3881 s/iter. ETA=0:11:28 [05/09 00:18:23 d2.evaluation.evaluator]: Inference done 456/2215. Dataloading: 0.2634 s/iter. Inference: 0.1143 s/iter. Eval: 0.0083 s/iter. Total: 0.3862 s/iter. ETA=0:11:19 [05/09 00:18:28 d2.evaluation.evaluator]: Inference done 472/2215. Dataloading: 0.2626 s/iter. Inference: 0.1127 s/iter. Eval: 0.0083 s/iter. Total: 0.3838 s/iter. ETA=0:11:09 [05/09 00:18:33 d2.evaluation.evaluator]: Inference done 487/2215. Dataloading: 0.2627 s/iter. Inference: 0.1113 s/iter. Eval: 0.0083 s/iter. Total: 0.3825 s/iter. ETA=0:11:00 [05/09 00:18:38 d2.evaluation.evaluator]: Inference done 502/2215. Dataloading: 0.2631 s/iter. Inference: 0.1100 s/iter. Eval: 0.0083 s/iter. Total: 0.3816 s/iter. ETA=0:10:53 [05/09 00:18:44 d2.evaluation.evaluator]: Inference done 518/2215. Dataloading: 0.2638 s/iter. Inference: 0.1087 s/iter. Eval: 0.0083 s/iter. Total: 0.3810 s/iter. ETA=0:10:46 [05/09 00:18:49 d2.evaluation.evaluator]: Inference done 534/2215. Dataloading: 0.2638 s/iter. Inference: 0.1074 s/iter. Eval: 0.0083 s/iter. Total: 0.3798 s/iter. ETA=0:10:38 [05/09 00:18:55 d2.evaluation.evaluator]: Inference done 552/2215. Dataloading: 0.2624 s/iter. Inference: 0.1061 s/iter. Eval: 0.0083 s/iter. Total: 0.3771 s/iter. ETA=0:10:27 [05/09 00:19:00 d2.evaluation.evaluator]: Inference done 568/2215. Dataloading: 0.2619 s/iter. Inference: 0.1051 s/iter. Eval: 0.0083 s/iter. Total: 0.3755 s/iter. ETA=0:10:18 [05/09 00:19:05 d2.evaluation.evaluator]: Inference done 583/2215. Dataloading: 0.2618 s/iter. Inference: 0.1041 s/iter. Eval: 0.0083 s/iter. Total: 0.3744 s/iter. ETA=0:10:11 [05/09 00:19:10 d2.evaluation.evaluator]: Inference done 598/2215. Dataloading: 0.2619 s/iter. Inference: 0.1031 s/iter. Eval: 0.0083 s/iter. Total: 0.3736 s/iter. ETA=0:10:04 [05/09 00:19:15 d2.evaluation.evaluator]: Inference done 615/2215. Dataloading: 0.2612 s/iter. Inference: 0.1022 s/iter. Eval: 0.0083 s/iter. Total: 0.3719 s/iter. ETA=0:09:54 [05/09 00:19:21 d2.evaluation.evaluator]: Inference done 632/2215. Dataloading: 0.2606 s/iter. Inference: 0.1012 s/iter. Eval: 0.0084 s/iter. Total: 0.3703 s/iter. ETA=0:09:46 [05/09 00:19:26 d2.evaluation.evaluator]: Inference done 649/2215. Dataloading: 0.2603 s/iter. Inference: 0.1003 s/iter. Eval: 0.0083 s/iter. Total: 0.3692 s/iter. ETA=0:09:38 [05/09 00:19:32 d2.evaluation.evaluator]: Inference done 665/2215. Dataloading: 0.2607 s/iter. Inference: 0.0995 s/iter. Eval: 0.0083 s/iter. Total: 0.3688 s/iter. ETA=0:09:31 [05/09 00:19:38 d2.evaluation.evaluator]: Inference done 681/2215. Dataloading: 0.2614 s/iter. Inference: 0.0988 s/iter. Eval: 0.0084 s/iter. Total: 0.3688 s/iter. ETA=0:09:25 [05/09 00:19:43 d2.evaluation.evaluator]: Inference done 695/2215. Dataloading: 0.2618 s/iter. Inference: 0.0982 s/iter. Eval: 0.0084 s/iter. Total: 0.3685 s/iter. ETA=0:09:20 [05/09 00:19:48 d2.evaluation.evaluator]: Inference done 711/2215. Dataloading: 0.2618 s/iter. Inference: 0.0975 s/iter. Eval: 0.0084 s/iter. Total: 0.3679 s/iter. ETA=0:09:13 [05/09 00:19:53 d2.evaluation.evaluator]: Inference done 727/2215. Dataloading: 0.2615 s/iter. Inference: 0.0969 s/iter. Eval: 0.0084 s/iter. Total: 0.3670 s/iter. ETA=0:09:06 [05/09 00:19:59 d2.evaluation.evaluator]: Inference done 741/2215. Dataloading: 0.2621 s/iter. Inference: 0.0964 s/iter. Eval: 0.0085 s/iter. Total: 0.3671 s/iter. ETA=0:09:01 [05/09 00:20:04 d2.evaluation.evaluator]: Inference done 757/2215. Dataloading: 0.2622 s/iter. Inference: 0.0958 s/iter. Eval: 0.0085 s/iter. Total: 0.3667 s/iter. ETA=0:08:54 [05/09 00:20:09 d2.evaluation.evaluator]: Inference done 773/2215. Dataloading: 0.2620 s/iter. Inference: 0.0952 s/iter. Eval: 0.0085 s/iter. Total: 0.3659 s/iter. ETA=0:08:47 [05/09 00:20:15 d2.evaluation.evaluator]: Inference done 788/2215. Dataloading: 0.2621 s/iter. Inference: 0.0947 s/iter. Eval: 0.0085 s/iter. Total: 0.3655 s/iter. ETA=0:08:41 [05/09 00:20:20 d2.evaluation.evaluator]: Inference done 804/2215. Dataloading: 0.2620 s/iter. Inference: 0.0941 s/iter. Eval: 0.0085 s/iter. Total: 0.3648 s/iter. ETA=0:08:34 [05/09 00:20:25 d2.evaluation.evaluator]: Inference done 820/2215. Dataloading: 0.2617 s/iter. Inference: 0.0936 s/iter. Eval: 0.0085 s/iter. Total: 0.3640 s/iter. ETA=0:08:27 [05/09 00:20:30 d2.evaluation.evaluator]: Inference done 836/2215. Dataloading: 0.2615 s/iter. Inference: 0.0930 s/iter. Eval: 0.0085 s/iter. Total: 0.3632 s/iter. ETA=0:08:20 [05/09 00:20:35 d2.evaluation.evaluator]: Inference done 850/2215. Dataloading: 0.2620 s/iter. Inference: 0.0926 s/iter. Eval: 0.0084 s/iter. Total: 0.3633 s/iter. ETA=0:08:15 [05/09 00:20:40 d2.evaluation.evaluator]: Inference done 865/2215. Dataloading: 0.2619 s/iter. Inference: 0.0922 s/iter. Eval: 0.0085 s/iter. Total: 0.3628 s/iter. ETA=0:08:09 [05/09 00:20:46 d2.evaluation.evaluator]: Inference done 879/2215. Dataloading: 0.2629 s/iter. Inference: 0.0918 s/iter. Eval: 0.0084 s/iter. Total: 0.3633 s/iter. ETA=0:08:05 [05/09 00:20:51 d2.evaluation.evaluator]: Inference done 893/2215. Dataloading: 0.2631 s/iter. Inference: 0.0914 s/iter. Eval: 0.0084 s/iter. Total: 0.3632 s/iter. ETA=0:08:00 [05/09 00:20:56 d2.evaluation.evaluator]: Inference done 907/2215. Dataloading: 0.2637 s/iter. Inference: 0.0911 s/iter. Eval: 0.0084 s/iter. Total: 0.3634 s/iter. ETA=0:07:55 [05/09 00:21:01 d2.evaluation.evaluator]: Inference done 920/2215. Dataloading: 0.2644 s/iter. Inference: 0.0907 s/iter. Eval: 0.0084 s/iter. Total: 0.3637 s/iter. ETA=0:07:51 [05/09 00:21:06 d2.evaluation.evaluator]: Inference done 933/2215. Dataloading: 0.2652 s/iter. Inference: 0.0904 s/iter. Eval: 0.0084 s/iter. Total: 0.3642 s/iter. ETA=0:07:46 [05/09 00:21:12 d2.evaluation.evaluator]: Inference done 949/2215. Dataloading: 0.2648 s/iter. Inference: 0.0900 s/iter. Eval: 0.0084 s/iter. Total: 0.3635 s/iter. ETA=0:07:40 [05/09 00:21:17 d2.evaluation.evaluator]: Inference done 962/2215. Dataloading: 0.2654 s/iter. Inference: 0.0898 s/iter. Eval: 0.0084 s/iter. Total: 0.3638 s/iter. ETA=0:07:35 [05/09 00:21:22 d2.evaluation.evaluator]: Inference done 977/2215. Dataloading: 0.2655 s/iter. Inference: 0.0895 s/iter. Eval: 0.0085 s/iter. Total: 0.3637 s/iter. ETA=0:07:30 [05/09 00:21:27 d2.evaluation.evaluator]: Inference done 995/2215. Dataloading: 0.2649 s/iter. Inference: 0.0891 s/iter. Eval: 0.0084 s/iter. Total: 0.3626 s/iter. ETA=0:07:22 [05/09 00:21:32 d2.evaluation.evaluator]: Inference done 1010/2215. Dataloading: 0.2648 s/iter. Inference: 0.0887 s/iter. Eval: 0.0084 s/iter. Total: 0.3623 s/iter. ETA=0:07:16 [05/09 00:21:38 d2.evaluation.evaluator]: Inference done 1026/2215. Dataloading: 0.2649 s/iter. Inference: 0.0884 s/iter. Eval: 0.0084 s/iter. Total: 0.3620 s/iter. ETA=0:07:10 [05/09 00:21:43 d2.evaluation.evaluator]: Inference done 1043/2215. Dataloading: 0.2644 s/iter. Inference: 0.0881 s/iter. Eval: 0.0084 s/iter. Total: 0.3610 s/iter. ETA=0:07:03 [05/09 00:21:48 d2.evaluation.evaluator]: Inference done 1059/2215. Dataloading: 0.2641 s/iter. Inference: 0.0878 s/iter. Eval: 0.0084 s/iter. Total: 0.3605 s/iter. ETA=0:06:56 [05/09 00:21:53 d2.evaluation.evaluator]: Inference done 1074/2215. Dataloading: 0.2641 s/iter. Inference: 0.0875 s/iter. Eval: 0.0084 s/iter. Total: 0.3603 s/iter. ETA=0:06:51 [05/09 00:21:59 d2.evaluation.evaluator]: Inference done 1090/2215. Dataloading: 0.2638 s/iter. Inference: 0.0872 s/iter. Eval: 0.0085 s/iter. Total: 0.3596 s/iter. ETA=0:06:44 [05/09 00:22:04 d2.evaluation.evaluator]: Inference done 1106/2215. Dataloading: 0.2637 s/iter. Inference: 0.0869 s/iter. Eval: 0.0084 s/iter. Total: 0.3593 s/iter. ETA=0:06:38 [05/09 00:22:09 d2.evaluation.evaluator]: Inference done 1120/2215. Dataloading: 0.2642 s/iter. Inference: 0.0867 s/iter. Eval: 0.0084 s/iter. Total: 0.3596 s/iter. ETA=0:06:33 [05/09 00:22:15 d2.evaluation.evaluator]: Inference done 1136/2215. Dataloading: 0.2640 s/iter. Inference: 0.0864 s/iter. Eval: 0.0084 s/iter. Total: 0.3591 s/iter. ETA=0:06:27 [05/09 00:22:20 d2.evaluation.evaluator]: Inference done 1150/2215. Dataloading: 0.2649 s/iter. Inference: 0.0862 s/iter. Eval: 0.0084 s/iter. Total: 0.3597 s/iter. ETA=0:06:23 [05/09 00:22:25 d2.evaluation.evaluator]: Inference done 1166/2215. Dataloading: 0.2646 s/iter. Inference: 0.0860 s/iter. Eval: 0.0084 s/iter. Total: 0.3592 s/iter. ETA=0:06:16 [05/09 00:22:31 d2.evaluation.evaluator]: Inference done 1182/2215. Dataloading: 0.2643 s/iter. Inference: 0.0857 s/iter. Eval: 0.0085 s/iter. Total: 0.3587 s/iter. ETA=0:06:10 [05/09 00:22:36 d2.evaluation.evaluator]: Inference done 1196/2215. Dataloading: 0.2645 s/iter. Inference: 0.0855 s/iter. Eval: 0.0085 s/iter. Total: 0.3587 s/iter. ETA=0:06:05 [05/09 00:22:41 d2.evaluation.evaluator]: Inference done 1211/2215. Dataloading: 0.2647 s/iter. Inference: 0.0853 s/iter. Eval: 0.0084 s/iter. Total: 0.3587 s/iter. ETA=0:06:00 [05/09 00:22:46 d2.evaluation.evaluator]: Inference done 1225/2215. Dataloading: 0.2650 s/iter. Inference: 0.0851 s/iter. Eval: 0.0084 s/iter. Total: 0.3587 s/iter. ETA=0:05:55 [05/09 00:22:52 d2.evaluation.evaluator]: Inference done 1240/2215. Dataloading: 0.2654 s/iter. Inference: 0.0849 s/iter. Eval: 0.0084 s/iter. Total: 0.3589 s/iter. ETA=0:05:49 [05/09 00:22:57 d2.evaluation.evaluator]: Inference done 1256/2215. Dataloading: 0.2652 s/iter. Inference: 0.0847 s/iter. Eval: 0.0084 s/iter. Total: 0.3585 s/iter. ETA=0:05:43 [05/09 00:23:02 d2.evaluation.evaluator]: Inference done 1270/2215. Dataloading: 0.2659 s/iter. Inference: 0.0845 s/iter. Eval: 0.0084 s/iter. Total: 0.3590 s/iter. ETA=0:05:39 [05/09 00:23:08 d2.evaluation.evaluator]: Inference done 1286/2215. Dataloading: 0.2656 s/iter. Inference: 0.0843 s/iter. Eval: 0.0084 s/iter. Total: 0.3585 s/iter. ETA=0:05:33 [05/09 00:23:13 d2.evaluation.evaluator]: Inference done 1300/2215. Dataloading: 0.2658 s/iter. Inference: 0.0841 s/iter. Eval: 0.0084 s/iter. Total: 0.3585 s/iter. ETA=0:05:28 [05/09 00:23:18 d2.evaluation.evaluator]: Inference done 1316/2215. Dataloading: 0.2654 s/iter. Inference: 0.0839 s/iter. Eval: 0.0084 s/iter. Total: 0.3580 s/iter. ETA=0:05:21 [05/09 00:23:23 d2.evaluation.evaluator]: Inference done 1332/2215. Dataloading: 0.2651 s/iter. Inference: 0.0838 s/iter. Eval: 0.0084 s/iter. Total: 0.3574 s/iter. ETA=0:05:15 [05/09 00:23:28 d2.evaluation.evaluator]: Inference done 1345/2215. Dataloading: 0.2660 s/iter. Inference: 0.0836 s/iter. Eval: 0.0083 s/iter. Total: 0.3581 s/iter. ETA=0:05:11 [05/09 00:23:33 d2.evaluation.evaluator]: Inference done 1361/2215. Dataloading: 0.2657 s/iter. Inference: 0.0834 s/iter. Eval: 0.0083 s/iter. Total: 0.3577 s/iter. ETA=0:05:05 [05/09 00:23:39 d2.evaluation.evaluator]: Inference done 1375/2215. Dataloading: 0.2660 s/iter. Inference: 0.0833 s/iter. Eval: 0.0084 s/iter. Total: 0.3578 s/iter. ETA=0:05:00 [05/09 00:23:44 d2.evaluation.evaluator]: Inference done 1389/2215. Dataloading: 0.2663 s/iter. Inference: 0.0831 s/iter. Eval: 0.0084 s/iter. Total: 0.3580 s/iter. ETA=0:04:55 [05/09 00:23:49 d2.evaluation.evaluator]: Inference done 1405/2215. Dataloading: 0.2661 s/iter. Inference: 0.0829 s/iter. Eval: 0.0084 s/iter. Total: 0.3576 s/iter. ETA=0:04:49 [05/09 00:23:55 d2.evaluation.evaluator]: Inference done 1419/2215. Dataloading: 0.2666 s/iter. Inference: 0.0828 s/iter. Eval: 0.0084 s/iter. Total: 0.3580 s/iter. ETA=0:04:44 [05/09 00:24:00 d2.evaluation.evaluator]: Inference done 1433/2215. Dataloading: 0.2668 s/iter. Inference: 0.0826 s/iter. Eval: 0.0084 s/iter. Total: 0.3580 s/iter. ETA=0:04:39 [05/09 00:24:05 d2.evaluation.evaluator]: Inference done 1448/2215. Dataloading: 0.2669 s/iter. Inference: 0.0824 s/iter. Eval: 0.0083 s/iter. Total: 0.3579 s/iter. ETA=0:04:34 [05/09 00:24:10 d2.evaluation.evaluator]: Inference done 1463/2215. Dataloading: 0.2670 s/iter. Inference: 0.0823 s/iter. Eval: 0.0083 s/iter. Total: 0.3579 s/iter. ETA=0:04:29 [05/09 00:24:16 d2.evaluation.evaluator]: Inference done 1478/2215. Dataloading: 0.2672 s/iter. Inference: 0.0821 s/iter. Eval: 0.0084 s/iter. Total: 0.3579 s/iter. ETA=0:04:23 [05/09 00:24:21 d2.evaluation.evaluator]: Inference done 1493/2215. Dataloading: 0.2671 s/iter. Inference: 0.0820 s/iter. Eval: 0.0084 s/iter. Total: 0.3577 s/iter. ETA=0:04:18 [05/09 00:24:26 d2.evaluation.evaluator]: Inference done 1506/2215. Dataloading: 0.2677 s/iter. Inference: 0.0819 s/iter. Eval: 0.0084 s/iter. Total: 0.3582 s/iter. ETA=0:04:13 [05/09 00:24:31 d2.evaluation.evaluator]: Inference done 1520/2215. Dataloading: 0.2679 s/iter. Inference: 0.0818 s/iter. Eval: 0.0084 s/iter. Total: 0.3582 s/iter. ETA=0:04:08 [05/09 00:24:36 d2.evaluation.evaluator]: Inference done 1536/2215. Dataloading: 0.2679 s/iter. Inference: 0.0816 s/iter. Eval: 0.0083 s/iter. Total: 0.3580 s/iter. ETA=0:04:03 [05/09 00:24:42 d2.evaluation.evaluator]: Inference done 1552/2215. Dataloading: 0.2677 s/iter. Inference: 0.0815 s/iter. Eval: 0.0083 s/iter. Total: 0.3578 s/iter. ETA=0:03:57 [05/09 00:24:47 d2.evaluation.evaluator]: Inference done 1568/2215. Dataloading: 0.2675 s/iter. Inference: 0.0813 s/iter. Eval: 0.0083 s/iter. Total: 0.3574 s/iter. ETA=0:03:51 [05/09 00:24:53 d2.evaluation.evaluator]: Inference done 1583/2215. Dataloading: 0.2677 s/iter. Inference: 0.0812 s/iter. Eval: 0.0083 s/iter. Total: 0.3575 s/iter. ETA=0:03:45 [05/09 00:24:58 d2.evaluation.evaluator]: Inference done 1597/2215. Dataloading: 0.2679 s/iter. Inference: 0.0811 s/iter. Eval: 0.0083 s/iter. Total: 0.3576 s/iter. ETA=0:03:40 [05/09 00:25:03 d2.evaluation.evaluator]: Inference done 1613/2215. Dataloading: 0.2678 s/iter. Inference: 0.0810 s/iter. Eval: 0.0083 s/iter. Total: 0.3573 s/iter. ETA=0:03:35 [05/09 00:25:08 d2.evaluation.evaluator]: Inference done 1628/2215. Dataloading: 0.2679 s/iter. Inference: 0.0809 s/iter. Eval: 0.0083 s/iter. Total: 0.3573 s/iter. ETA=0:03:29 [05/09 00:25:14 d2.evaluation.evaluator]: Inference done 1644/2215. Dataloading: 0.2677 s/iter. Inference: 0.0808 s/iter. Eval: 0.0083 s/iter. Total: 0.3570 s/iter. ETA=0:03:23 [05/09 00:25:19 d2.evaluation.evaluator]: Inference done 1660/2215. Dataloading: 0.2677 s/iter. Inference: 0.0806 s/iter. Eval: 0.0083 s/iter. Total: 0.3568 s/iter. ETA=0:03:18 [05/09 00:25:25 d2.evaluation.evaluator]: Inference done 1677/2215. Dataloading: 0.2677 s/iter. Inference: 0.0805 s/iter. Eval: 0.0082 s/iter. Total: 0.3567 s/iter. ETA=0:03:11 [05/09 00:25:30 d2.evaluation.evaluator]: Inference done 1692/2215. Dataloading: 0.2677 s/iter. Inference: 0.0804 s/iter. Eval: 0.0083 s/iter. Total: 0.3566 s/iter. ETA=0:03:06 [05/09 00:25:35 d2.evaluation.evaluator]: Inference done 1706/2215. Dataloading: 0.2681 s/iter. Inference: 0.0803 s/iter. Eval: 0.0083 s/iter. Total: 0.3569 s/iter. ETA=0:03:01 [05/09 00:25:40 d2.evaluation.evaluator]: Inference done 1720/2215. Dataloading: 0.2682 s/iter. Inference: 0.0802 s/iter. Eval: 0.0083 s/iter. Total: 0.3569 s/iter. ETA=0:02:56 [05/09 00:25:45 d2.evaluation.evaluator]: Inference done 1733/2215. Dataloading: 0.2685 s/iter. Inference: 0.0801 s/iter. Eval: 0.0083 s/iter. Total: 0.3571 s/iter. ETA=0:02:52 [05/09 00:25:51 d2.evaluation.evaluator]: Inference done 1748/2215. Dataloading: 0.2686 s/iter. Inference: 0.0800 s/iter. Eval: 0.0083 s/iter. Total: 0.3572 s/iter. ETA=0:02:46 [05/09 00:25:56 d2.evaluation.evaluator]: Inference done 1762/2215. Dataloading: 0.2688 s/iter. Inference: 0.0799 s/iter. Eval: 0.0083 s/iter. Total: 0.3572 s/iter. ETA=0:02:41 [05/09 00:26:01 d2.evaluation.evaluator]: Inference done 1774/2215. Dataloading: 0.2693 s/iter. Inference: 0.0798 s/iter. Eval: 0.0083 s/iter. Total: 0.3577 s/iter. ETA=0:02:37 [05/09 00:26:06 d2.evaluation.evaluator]: Inference done 1787/2215. Dataloading: 0.2696 s/iter. Inference: 0.0797 s/iter. Eval: 0.0083 s/iter. Total: 0.3579 s/iter. ETA=0:02:33 [05/09 00:26:12 d2.evaluation.evaluator]: Inference done 1804/2215. Dataloading: 0.2695 s/iter. Inference: 0.0796 s/iter. Eval: 0.0083 s/iter. Total: 0.3576 s/iter. ETA=0:02:26 [05/09 00:26:17 d2.evaluation.evaluator]: Inference done 1820/2215. Dataloading: 0.2694 s/iter. Inference: 0.0795 s/iter. Eval: 0.0083 s/iter. Total: 0.3574 s/iter. ETA=0:02:21 [05/09 00:26:23 d2.evaluation.evaluator]: Inference done 1836/2215. Dataloading: 0.2694 s/iter. Inference: 0.0794 s/iter. Eval: 0.0083 s/iter. Total: 0.3573 s/iter. ETA=0:02:15 [05/09 00:26:28 d2.evaluation.evaluator]: Inference done 1852/2215. Dataloading: 0.2691 s/iter. Inference: 0.0793 s/iter. Eval: 0.0083 s/iter. Total: 0.3569 s/iter. ETA=0:02:09 [05/09 00:26:33 d2.evaluation.evaluator]: Inference done 1867/2215. Dataloading: 0.2691 s/iter. Inference: 0.0792 s/iter. Eval: 0.0082 s/iter. Total: 0.3568 s/iter. ETA=0:02:04 [05/09 00:26:38 d2.evaluation.evaluator]: Inference done 1881/2215. Dataloading: 0.2693 s/iter. Inference: 0.0791 s/iter. Eval: 0.0082 s/iter. Total: 0.3569 s/iter. ETA=0:01:59 [05/09 00:26:43 d2.evaluation.evaluator]: Inference done 1893/2215. Dataloading: 0.2700 s/iter. Inference: 0.0791 s/iter. Eval: 0.0082 s/iter. Total: 0.3575 s/iter. ETA=0:01:55 [05/09 00:26:48 d2.evaluation.evaluator]: Inference done 1907/2215. Dataloading: 0.2701 s/iter. Inference: 0.0790 s/iter. Eval: 0.0083 s/iter. Total: 0.3575 s/iter. ETA=0:01:50 [05/09 00:26:54 d2.evaluation.evaluator]: Inference done 1921/2215. Dataloading: 0.2704 s/iter. Inference: 0.0789 s/iter. Eval: 0.0083 s/iter. Total: 0.3578 s/iter. ETA=0:01:45 [05/09 00:26:59 d2.evaluation.evaluator]: Inference done 1936/2215. Dataloading: 0.2705 s/iter. Inference: 0.0788 s/iter. Eval: 0.0082 s/iter. Total: 0.3578 s/iter. ETA=0:01:39 [05/09 00:27:05 d2.evaluation.evaluator]: Inference done 1951/2215. Dataloading: 0.2709 s/iter. Inference: 0.0787 s/iter. Eval: 0.0083 s/iter. Total: 0.3581 s/iter. ETA=0:01:34 [05/09 00:27:10 d2.evaluation.evaluator]: Inference done 1965/2215. Dataloading: 0.2710 s/iter. Inference: 0.0787 s/iter. Eval: 0.0083 s/iter. Total: 0.3582 s/iter. ETA=0:01:29 [05/09 00:27:16 d2.evaluation.evaluator]: Inference done 1981/2215. Dataloading: 0.2708 s/iter. Inference: 0.0786 s/iter. Eval: 0.0083 s/iter. Total: 0.3579 s/iter. ETA=0:01:23 [05/09 00:27:21 d2.evaluation.evaluator]: Inference done 1995/2215. Dataloading: 0.2709 s/iter. Inference: 0.0785 s/iter. Eval: 0.0083 s/iter. Total: 0.3579 s/iter. ETA=0:01:18 [05/09 00:27:27 d2.evaluation.evaluator]: Inference done 2011/2215. Dataloading: 0.2712 s/iter. Inference: 0.0784 s/iter. Eval: 0.0082 s/iter. Total: 0.3581 s/iter. ETA=0:01:13 [05/09 00:27:32 d2.evaluation.evaluator]: Inference done 2025/2215. Dataloading: 0.2713 s/iter. Inference: 0.0784 s/iter. Eval: 0.0082 s/iter. Total: 0.3581 s/iter. ETA=0:01:08 [05/09 00:27:37 d2.evaluation.evaluator]: Inference done 2039/2215. Dataloading: 0.2715 s/iter. Inference: 0.0783 s/iter. Eval: 0.0082 s/iter. Total: 0.3583 s/iter. ETA=0:01:03 [05/09 00:27:42 d2.evaluation.evaluator]: Inference done 2053/2215. Dataloading: 0.2717 s/iter. Inference: 0.0782 s/iter. Eval: 0.0082 s/iter. Total: 0.3583 s/iter. ETA=0:00:58 [05/09 00:27:48 d2.evaluation.evaluator]: Inference done 2069/2215. Dataloading: 0.2717 s/iter. Inference: 0.0781 s/iter. Eval: 0.0082 s/iter. Total: 0.3582 s/iter. ETA=0:00:52 [05/09 00:27:53 d2.evaluation.evaluator]: Inference done 2085/2215. Dataloading: 0.2716 s/iter. Inference: 0.0780 s/iter. Eval: 0.0082 s/iter. Total: 0.3581 s/iter. ETA=0:00:46 [05/09 00:27:58 d2.evaluation.evaluator]: Inference done 2100/2215. Dataloading: 0.2715 s/iter. Inference: 0.0780 s/iter. Eval: 0.0082 s/iter. Total: 0.3579 s/iter. ETA=0:00:41 [05/09 00:28:04 d2.evaluation.evaluator]: Inference done 2114/2215. Dataloading: 0.2722 s/iter. Inference: 0.0779 s/iter. Eval: 0.0082 s/iter. Total: 0.3585 s/iter. ETA=0:00:36 [05/09 00:28:10 d2.evaluation.evaluator]: Inference done 2130/2215. Dataloading: 0.2721 s/iter. Inference: 0.0778 s/iter. Eval: 0.0082 s/iter. Total: 0.3584 s/iter. ETA=0:00:30 [05/09 00:28:15 d2.evaluation.evaluator]: Inference done 2144/2215. Dataloading: 0.2724 s/iter. Inference: 0.0778 s/iter. Eval: 0.0081 s/iter. Total: 0.3585 s/iter. ETA=0:00:25 [05/09 00:28:20 d2.evaluation.evaluator]: Inference done 2158/2215. Dataloading: 0.2725 s/iter. Inference: 0.0777 s/iter. Eval: 0.0081 s/iter. Total: 0.3586 s/iter. ETA=0:00:20 [05/09 00:28:25 d2.evaluation.evaluator]: Inference done 2171/2215. Dataloading: 0.2728 s/iter. Inference: 0.0776 s/iter. Eval: 0.0081 s/iter. Total: 0.3588 s/iter. ETA=0:00:15 [05/09 00:28:31 d2.evaluation.evaluator]: Inference done 2186/2215. Dataloading: 0.2728 s/iter. Inference: 0.0776 s/iter. Eval: 0.0081 s/iter. Total: 0.3587 s/iter. ETA=0:00:10 [05/09 00:28:36 d2.evaluation.evaluator]: Inference done 2202/2215. Dataloading: 0.2727 s/iter. Inference: 0.0775 s/iter. Eval: 0.0082 s/iter. Total: 0.3586 s/iter. ETA=0:00:04 [05/09 00:28:41 d2.evaluation.evaluator]: Total inference time: 0:13:12.528540 (0.358610 s / iter per device, on 1 devices) [05/09 00:28:41 d2.evaluation.evaluator]: Total inference pure compute time: 0:02:51 (0.077470 s / iter per device, on 1 devices) [05/09 00:28:41 d2.evaluation.coco_evaluation]: Preparing results for COCO format ... [05/09 00:28:41 d2.evaluation.coco_evaluation]: Saving results to ./output_evals/coco_instances_results.json [05/09 00:28:42 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API... Loading and preparing results... DONE (t=0.03s) creating index... index created! [05/09 00:28:42 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox* [05/09 00:28:42 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 0.73 seconds. [05/09 00:28:42 d2.evaluation.fast_eval_api]: Accumulating evaluation results... [05/09 00:28:42 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.12 seconds. Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.121 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.192 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.134 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.170 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.120 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.121 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.214 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.215 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.293 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.215 [05/09 00:28:42 d2.evaluation.coco_evaluation]: Evaluation results for bbox: | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:------:|:------:|:------:| | 12.055 | 19.171 | 13.449 | 50.000 | 16.957 | 11.992 | [05/09 00:28:42 d2.evaluation.coco_evaluation]: Per-category bbox AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-----------|:------|:-----------|:-------| | hold | 40.045 | carry | 0.000 | point | 0.000 | | eat | 1.653 | drink | 0.000 | stand | 42.685 | | talk_on_phone | 0.000 | | | | | Loading and preparing results... DONE (t=0.34s) creating index... index created! [05/09 00:28:44 d2.evaluation.fast_eval_api]: Evaluate annotation type *segm* [05/09 00:28:45 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 1.38 seconds. [05/09 00:28:45 d2.evaluation.fast_eval_api]: Accumulating evaluation results... [05/09 00:28:45 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.12 seconds. Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.112 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.186 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.126 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.033 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.120 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.125 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.118 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.209 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.210 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.282 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.212 [05/09 00:28:45 d2.evaluation.coco_evaluation]: Evaluation results for segm: | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:-----:|:------:|:------:| | 11.216 | 18.607 | 12.625 | 3.333 | 12.010 | 12.547 | [05/09 00:28:45 d2.evaluation.coco_evaluation]: Per-category segm AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-----------|:------|:-----------|:-------| | hold | 34.787 | carry | 0.000 | point | 0.000 | | eat | 2.034 | drink | 0.000 | stand | 41.690 | | talk_on_phone | 0.000 | | | | |
OrderedDict([('bbox',
{'AP': 12.054746090751948,
'AP-carry': 0.0,
'AP-drink': 0.0,
'AP-eat': 1.653387355812205,
'AP-hold': 40.04496914199544,
'AP-point': 0.0,
'AP-stand': 42.684866137455984,
'AP-talk_on_phone': 0.0,
'AP50': 19.171166864176435,
'AP75': 13.44908209568981,
'APl': 11.992233646442443,
'APm': 16.956725039905564,
'APs': 50.0}),
('segm',
{'AP': 11.215705604418499,
'AP-carry': 0.0,
'AP-drink': 0.0,
'AP-eat': 2.033828393624494,
'AP-hold': 34.786599491454645,
'AP-point': 0.0,
'AP-stand': 41.68951134585036,
'AP-talk_on_phone': 0.0,
'AP50': 18.607357993512416,
'AP75': 12.624930670282072,
'APl': 12.54704110216769,
'APm': 12.010120116297184,
'APs': 3.3333333333333326})])
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.4 # set the testing threshold for this model
cfg.DATASETS.TEST = ("rel_actions_val3", )
predictor = DefaultPredictor(cfg)
[05/09 00:31:05 d2.checkpoint.c2_model_loading]: Following weights matched with model:
| Names in Model | Names in Checkpoint | Shapes |
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.* | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,1,1) |
| backbone.bottom_up.res2.0.conv2.* | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.0.conv3.* | backbone.bottom_up.res2.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.0.shortcut.* | backbone.bottom_up.res2.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.1.conv1.* | backbone.bottom_up.res2.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.1.conv2.* | backbone.bottom_up.res2.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.1.conv3.* | backbone.bottom_up.res2.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.2.conv1.* | backbone.bottom_up.res2.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.2.conv2.* | backbone.bottom_up.res2.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.2.conv3.* | backbone.bottom_up.res2.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res3.0.conv1.* | backbone.bottom_up.res3.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,256,1,1) |
| backbone.bottom_up.res3.0.conv2.* | backbone.bottom_up.res3.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.0.conv3.* | backbone.bottom_up.res3.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.0.shortcut.* | backbone.bottom_up.res3.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,256,1,1) |
| backbone.bottom_up.res3.1.conv1.* | backbone.bottom_up.res3.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.1.conv2.* | backbone.bottom_up.res3.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.1.conv3.* | backbone.bottom_up.res3.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.2.conv1.* | backbone.bottom_up.res3.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.2.conv2.* | backbone.bottom_up.res3.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.2.conv3.* | backbone.bottom_up.res3.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.3.conv1.* | backbone.bottom_up.res3.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.3.conv2.* | backbone.bottom_up.res3.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.3.conv3.* | backbone.bottom_up.res3.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res4.0.conv1.* | backbone.bottom_up.res4.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,512,1,1) |
| backbone.bottom_up.res4.0.conv2.* | backbone.bottom_up.res4.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.0.conv3.* | backbone.bottom_up.res4.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.0.shortcut.* | backbone.bottom_up.res4.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,512,1,1) |
| backbone.bottom_up.res4.1.conv1.* | backbone.bottom_up.res4.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.1.conv2.* | backbone.bottom_up.res4.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.1.conv3.* | backbone.bottom_up.res4.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.2.conv1.* | backbone.bottom_up.res4.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.2.conv2.* | backbone.bottom_up.res4.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.2.conv3.* | backbone.bottom_up.res4.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.3.conv1.* | backbone.bottom_up.res4.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.3.conv2.* | backbone.bottom_up.res4.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.3.conv3.* | backbone.bottom_up.res4.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.4.conv1.* | backbone.bottom_up.res4.4.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.4.conv2.* | backbone.bottom_up.res4.4.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.4.conv3.* | backbone.bottom_up.res4.4.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.5.conv1.* | backbone.bottom_up.res4.5.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.5.conv2.* | backbone.bottom_up.res4.5.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.5.conv3.* | backbone.bottom_up.res4.5.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res5.0.conv1.* | backbone.bottom_up.res5.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,1024,1,1) |
| backbone.bottom_up.res5.0.conv2.* | backbone.bottom_up.res5.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.0.conv3.* | backbone.bottom_up.res5.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.0.shortcut.* | backbone.bottom_up.res5.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,1024,1,1) |
| backbone.bottom_up.res5.1.conv1.* | backbone.bottom_up.res5.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.1.conv2.* | backbone.bottom_up.res5.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.1.conv3.* | backbone.bottom_up.res5.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.2.conv1.* | backbone.bottom_up.res5.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.2.conv2.* | backbone.bottom_up.res5.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.2.conv3.* | backbone.bottom_up.res5.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.stem.conv1.* | backbone.bottom_up.stem.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,3,7,7) |
| backbone.fpn_lateral2.* | backbone.fpn_lateral2.{bias,weight} | (256,) (256,256,1,1) |
| backbone.fpn_lateral3.* | backbone.fpn_lateral3.{bias,weight} | (256,) (256,512,1,1) |
| backbone.fpn_lateral4.* | backbone.fpn_lateral4.{bias,weight} | (256,) (256,1024,1,1) |
| backbone.fpn_lateral5.* | backbone.fpn_lateral5.{bias,weight} | (256,) (256,2048,1,1) |
| backbone.fpn_output2.* | backbone.fpn_output2.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output3.* | backbone.fpn_output3.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output4.* | backbone.fpn_output4.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output5.* | backbone.fpn_output5.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.anchor_deltas.* | proposal_generator.rpn_head.anchor_deltas.{bias,weight} | (12,) (12,256,1,1) |
| proposal_generator.rpn_head.conv.* | proposal_generator.rpn_head.conv.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.objectness_logits.* | proposal_generator.rpn_head.objectness_logits.{bias,weight} | (3,) (3,256,1,1) |
| roi_heads.box_head.fc1.* | roi_heads.box_head.fc1.{bias,weight} | (1024,) (1024,12544) |
| roi_heads.box_head.fc2.* | roi_heads.box_head.fc2.{bias,weight} | (1024,) (1024,1024) |
| roi_heads.box_predictor.bbox_pred.* | roi_heads.box_predictor.bbox_pred.{bias,weight} | (28,) (28,1024) |
| roi_heads.box_predictor.cls_score.* | roi_heads.box_predictor.cls_score.{bias,weight} | (8,) (8,1024) |
| roi_heads.mask_head.deconv.* | roi_heads.mask_head.deconv.{bias,weight} | (256,) (256,256,2,2) |
| roi_heads.mask_head.mask_fcn1.* | roi_heads.mask_head.mask_fcn1.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn2.* | roi_heads.mask_head.mask_fcn2.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn3.* | roi_heads.mask_head.mask_fcn3.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.mask_fcn4.* | roi_heads.mask_head.mask_fcn4.{bias,weight} | (256,) (256,256,3,3) |
| roi_heads.mask_head.predictor.* | roi_heads.mask_head.predictor.{bias,weight} | (7,) (7,256,1,1) |
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.engine import DefaultTrainer
evaluator = COCOEvaluator("rel_actions_val3", cfg, False, output_dir="./output_evals/")
val_loader = build_detection_test_loader(cfg, "rel_actions_val3")
inference_on_dataset(trainer.model, val_loader, evaluator)
WARNING [05/09 00:32:15 d2.evaluation.coco_evaluation]: COCO Evaluator instantiated using config, this is deprecated behavior. Please pass in explicit arguments instead. [05/09 00:32:15 d2.data.datasets.coco]: Loaded 2215 images in COCO format from ./data/instances_vcoco_actions_val_2014.json [05/09 00:32:15 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')] [05/09 00:32:15 d2.data.common]: Serializing 2215 elements to byte tensors and concatenating them all ... [05/09 00:32:15 d2.data.common]: Serialized dataset takes 5.66 MiB [05/09 00:32:15 d2.evaluation.evaluator]: Start inference on 2215 batches [05/09 00:32:16 d2.evaluation.evaluator]: Inference done 11/2215. Dataloading: 0.0017 s/iter. Inference: 0.0679 s/iter. Eval: 0.0063 s/iter. Total: 0.0759 s/iter. ETA=0:02:47 [05/09 00:32:21 d2.evaluation.evaluator]: Inference done 73/2215. Dataloading: 0.0022 s/iter. Inference: 0.0702 s/iter. Eval: 0.0078 s/iter. Total: 0.0804 s/iter. ETA=0:02:52 [05/09 00:32:27 d2.evaluation.evaluator]: Inference done 135/2215. Dataloading: 0.0022 s/iter. Inference: 0.0698 s/iter. Eval: 0.0086 s/iter. Total: 0.0808 s/iter. ETA=0:02:48 [05/09 00:32:32 d2.evaluation.evaluator]: Inference done 197/2215. Dataloading: 0.0022 s/iter. Inference: 0.0700 s/iter. Eval: 0.0086 s/iter. Total: 0.0809 s/iter. ETA=0:02:43 [05/09 00:32:37 d2.evaluation.evaluator]: Inference done 260/2215. Dataloading: 0.0022 s/iter. Inference: 0.0700 s/iter. Eval: 0.0084 s/iter. Total: 0.0807 s/iter. ETA=0:02:37 [05/09 00:32:42 d2.evaluation.evaluator]: Inference done 322/2215. Dataloading: 0.0022 s/iter. Inference: 0.0700 s/iter. Eval: 0.0084 s/iter. Total: 0.0808 s/iter. ETA=0:02:32 [05/09 00:32:47 d2.evaluation.evaluator]: Inference done 385/2215. Dataloading: 0.0022 s/iter. Inference: 0.0701 s/iter. Eval: 0.0083 s/iter. Total: 0.0807 s/iter. ETA=0:02:27 [05/09 00:32:52 d2.evaluation.evaluator]: Inference done 448/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0081 s/iter. Total: 0.0806 s/iter. ETA=0:02:22 [05/09 00:32:57 d2.evaluation.evaluator]: Inference done 511/2215. Dataloading: 0.0023 s/iter. Inference: 0.0700 s/iter. Eval: 0.0082 s/iter. Total: 0.0806 s/iter. ETA=0:02:17 [05/09 00:33:02 d2.evaluation.evaluator]: Inference done 574/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0081 s/iter. Total: 0.0806 s/iter. ETA=0:02:12 [05/09 00:33:07 d2.evaluation.evaluator]: Inference done 636/2215. Dataloading: 0.0023 s/iter. Inference: 0.0700 s/iter. Eval: 0.0082 s/iter. Total: 0.0806 s/iter. ETA=0:02:07 [05/09 00:33:12 d2.evaluation.evaluator]: Inference done 697/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:02:02 [05/09 00:33:17 d2.evaluation.evaluator]: Inference done 758/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0083 s/iter. Total: 0.0809 s/iter. ETA=0:01:57 [05/09 00:33:22 d2.evaluation.evaluator]: Inference done 820/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0083 s/iter. Total: 0.0810 s/iter. ETA=0:01:52 [05/09 00:33:27 d2.evaluation.evaluator]: Inference done 884/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:01:47 [05/09 00:33:32 d2.evaluation.evaluator]: Inference done 947/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:01:42 [05/09 00:33:37 d2.evaluation.evaluator]: Inference done 1009/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:01:37 [05/09 00:33:42 d2.evaluation.evaluator]: Inference done 1072/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:01:32 [05/09 00:33:47 d2.evaluation.evaluator]: Inference done 1134/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:01:27 [05/09 00:33:52 d2.evaluation.evaluator]: Inference done 1196/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0082 s/iter. Total: 0.0808 s/iter. ETA=0:01:22 [05/09 00:33:57 d2.evaluation.evaluator]: Inference done 1259/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0081 s/iter. Total: 0.0807 s/iter. ETA=0:01:17 [05/09 00:34:02 d2.evaluation.evaluator]: Inference done 1322/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0081 s/iter. Total: 0.0807 s/iter. ETA=0:01:12 [05/09 00:34:07 d2.evaluation.evaluator]: Inference done 1384/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0081 s/iter. Total: 0.0807 s/iter. ETA=0:01:07 [05/09 00:34:12 d2.evaluation.evaluator]: Inference done 1448/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0081 s/iter. Total: 0.0806 s/iter. ETA=0:01:01 [05/09 00:34:17 d2.evaluation.evaluator]: Inference done 1510/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0081 s/iter. Total: 0.0806 s/iter. ETA=0:00:56 [05/09 00:34:22 d2.evaluation.evaluator]: Inference done 1575/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0080 s/iter. Total: 0.0805 s/iter. ETA=0:00:51 [05/09 00:34:27 d2.evaluation.evaluator]: Inference done 1637/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0080 s/iter. Total: 0.0805 s/iter. ETA=0:00:46 [05/09 00:34:33 d2.evaluation.evaluator]: Inference done 1699/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0080 s/iter. Total: 0.0806 s/iter. ETA=0:00:41 [05/09 00:34:38 d2.evaluation.evaluator]: Inference done 1762/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0080 s/iter. Total: 0.0806 s/iter. ETA=0:00:36 [05/09 00:34:43 d2.evaluation.evaluator]: Inference done 1825/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0080 s/iter. Total: 0.0806 s/iter. ETA=0:00:31 [05/09 00:34:48 d2.evaluation.evaluator]: Inference done 1889/2215. Dataloading: 0.0023 s/iter. Inference: 0.0701 s/iter. Eval: 0.0079 s/iter. Total: 0.0805 s/iter. ETA=0:00:26 [05/09 00:34:53 d2.evaluation.evaluator]: Inference done 1949/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0080 s/iter. Total: 0.0806 s/iter. ETA=0:00:21 [05/09 00:34:58 d2.evaluation.evaluator]: Inference done 2011/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0080 s/iter. Total: 0.0806 s/iter. ETA=0:00:16 [05/09 00:35:03 d2.evaluation.evaluator]: Inference done 2075/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0079 s/iter. Total: 0.0805 s/iter. ETA=0:00:11 [05/09 00:35:08 d2.evaluation.evaluator]: Inference done 2138/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0079 s/iter. Total: 0.0805 s/iter. ETA=0:00:06 [05/09 00:35:13 d2.evaluation.evaluator]: Inference done 2200/2215. Dataloading: 0.0023 s/iter. Inference: 0.0702 s/iter. Eval: 0.0079 s/iter. Total: 0.0805 s/iter. ETA=0:00:01 [05/09 00:35:14 d2.evaluation.evaluator]: Total inference time: 0:02:58.067442 (0.080574 s / iter per device, on 1 devices) [05/09 00:35:14 d2.evaluation.evaluator]: Total inference pure compute time: 0:02:35 (0.070239 s / iter per device, on 1 devices) [05/09 00:35:14 d2.evaluation.coco_evaluation]: Preparing results for COCO format ... [05/09 00:35:14 d2.evaluation.coco_evaluation]: Saving results to ./output_evals/coco_instances_results.json [05/09 00:35:15 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API... Loading and preparing results... DONE (t=0.04s) creating index... index created! [05/09 00:35:15 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox* [05/09 00:35:16 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 1.21 seconds. [05/09 00:35:16 d2.evaluation.fast_eval_api]: Accumulating evaluation results... [05/09 00:35:16 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.12 seconds. Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.121 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.192 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.134 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.170 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.120 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.121 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.214 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.215 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.293 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.215 [05/09 00:35:16 d2.evaluation.coco_evaluation]: Evaluation results for bbox: | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:------:|:------:|:------:| | 12.055 | 19.171 | 13.449 | 50.000 | 16.957 | 11.992 | [05/09 00:35:16 d2.evaluation.coco_evaluation]: Per-category bbox AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-----------|:------|:-----------|:-------| | hold | 40.045 | carry | 0.000 | point | 0.000 | | eat | 1.653 | drink | 0.000 | stand | 42.685 | | talk_on_phone | 0.000 | | | | | Loading and preparing results... DONE (t=0.36s) creating index... index created! [05/09 00:35:17 d2.evaluation.fast_eval_api]: Evaluate annotation type *segm* [05/09 00:35:18 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 1.44 seconds. [05/09 00:35:18 d2.evaluation.fast_eval_api]: Accumulating evaluation results... [05/09 00:35:19 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.12 seconds. Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.112 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.186 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.126 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.033 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.120 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.125 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.118 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.209 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.210 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.400 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.282 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.212 [05/09 00:35:19 d2.evaluation.coco_evaluation]: Evaluation results for segm: | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:-----:|:------:|:------:| | 11.216 | 18.607 | 12.625 | 3.333 | 12.010 | 12.547 | [05/09 00:35:19 d2.evaluation.coco_evaluation]: Per-category segm AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-----------|:------|:-----------|:-------| | hold | 34.787 | carry | 0.000 | point | 0.000 | | eat | 2.034 | drink | 0.000 | stand | 41.690 | | talk_on_phone | 0.000 | | | | |
OrderedDict([('bbox',
{'AP': 12.054746090751948,
'AP-carry': 0.0,
'AP-drink': 0.0,
'AP-eat': 1.653387355812205,
'AP-hold': 40.04496914199544,
'AP-point': 0.0,
'AP-stand': 42.684866137455984,
'AP-talk_on_phone': 0.0,
'AP50': 19.171166864176435,
'AP75': 13.44908209568981,
'APl': 11.992233646442443,
'APm': 16.956725039905564,
'APs': 50.0}),
('segm',
{'AP': 11.215705604418499,
'AP-carry': 0.0,
'AP-drink': 0.0,
'AP-eat': 2.033828393624494,
'AP-hold': 34.786599491454645,
'AP-point': 0.0,
'AP-stand': 41.68951134585036,
'AP-talk_on_phone': 0.0,
'AP50': 18.607357993512416,
'AP75': 12.624930670282072,
'APl': 12.54704110216769,
'APm': 12.010120116297184,
'APs': 3.3333333333333326})])
COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
import os
from detectron2 import model_zoo
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("rel_actions1",)
cfg.DATASETS.TEST = () # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_R_50_FPN_3x.yaml") # initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.002
cfg.SOLVER.MAX_ITER = 1000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(relevant_classes) # all relevant classes
cfg.OUTPUT_DIR = "./outputs/action_only_detector"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
[05/09 00:48:15 d2.engine.defaults]: Model: GeneralizedRCNN( (backbone): FPN( (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelMaxPool() (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (proposal_generator): RPN( (rpn_head): StandardRPNHead( (conv): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1) (activation): ReLU() ) (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1)) (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1)) ) (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) ) (roi_heads): StandardROIHeads( (box_pooler): ROIPooler( (level_poolers): ModuleList( (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True) (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True) (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True) (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True) ) ) (box_head): FastRCNNConvFCHead( (flatten): Flatten(start_dim=1, end_dim=-1) (fc1): Linear(in_features=12544, out_features=1024, bias=True) (fc_relu1): ReLU() (fc2): Linear(in_features=1024, out_features=1024, bias=True) (fc_relu2): ReLU() ) (box_predictor): FastRCNNOutputLayers( (cls_score): Linear(in_features=1024, out_features=8, bias=True) (bbox_pred): Linear(in_features=1024, out_features=28, bias=True) ) ) ) [05/09 00:48:15 d2.data.datasets.coco]: Loaded 1951 images in COCO format from ./data/instances_vcoco_actions_train.json [05/09 00:48:15 d2.data.build]: Removed 0 images with no usable annotations. 1951 images left. [05/09 00:48:15 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()] [05/09 00:48:15 d2.data.build]: Using training sampler TrainingSampler [05/09 00:48:15 d2.data.common]: Serializing 1951 elements to byte tensors and concatenating them all ... [05/09 00:48:15 d2.data.common]: Serialized dataset takes 4.90 MiB WARNING [05/09 00:48:15 d2.solver.build]: SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.
model_final_280758.pkl: 167MB [00:20, 8.06MB/s] Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (8, 1024) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (8,) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (28, 1024) in the model! You might want to double check if this is expected. Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (28,) in the model! You might want to double check if this is expected. Some model parameters or buffers are not found in the checkpoint: roi_heads.box_predictor.bbox_pred.{bias, weight} roi_heads.box_predictor.cls_score.{bias, weight}
[05/09 00:48:36 d2.engine.train_loop]: Starting training from iteration 0 [05/09 00:48:42 d2.utils.events]: eta: 0:04:03 iter: 19 total_loss: 2.62 loss_cls: 2.017 loss_box_reg: 0.5686 loss_rpn_cls: 0.0201 loss_rpn_loc: 0.00475 time: 0.2523 data_time: 0.0253 lr: 3.9962e-05 max_mem: 2758M [05/09 00:48:47 d2.utils.events]: eta: 0:04:04 iter: 39 total_loss: 2.04 loss_cls: 1.369 loss_box_reg: 0.6559 loss_rpn_cls: 0.02173 loss_rpn_loc: 0.00415 time: 0.2630 data_time: 0.0128 lr: 7.9922e-05 max_mem: 2806M [05/09 00:48:53 d2.utils.events]: eta: 0:04:03 iter: 59 total_loss: 1.361 loss_cls: 0.7126 loss_box_reg: 0.5817 loss_rpn_cls: 0.01871 loss_rpn_loc: 0.005755 time: 0.2667 data_time: 0.0162 lr: 0.00011988 max_mem: 2806M [05/09 00:48:58 d2.utils.events]: eta: 0:04:00 iter: 79 total_loss: 1.053 loss_cls: 0.4737 loss_box_reg: 0.5136 loss_rpn_cls: 0.01666 loss_rpn_loc: 0.003539 time: 0.2644 data_time: 0.0147 lr: 0.00015984 max_mem: 2806M [05/09 00:49:04 d2.utils.events]: eta: 0:03:59 iter: 99 total_loss: 1.024 loss_cls: 0.4274 loss_box_reg: 0.5611 loss_rpn_cls: 0.01437 loss_rpn_loc: 0.004409 time: 0.2771 data_time: 0.0127 lr: 0.0001998 max_mem: 2895M [05/09 00:49:10 d2.utils.events]: eta: 0:03:54 iter: 119 total_loss: 1.034 loss_cls: 0.4182 loss_box_reg: 0.5902 loss_rpn_cls: 0.01133 loss_rpn_loc: 0.004913 time: 0.2783 data_time: 0.0126 lr: 0.00023976 max_mem: 2895M [05/09 00:49:15 d2.utils.events]: eta: 0:03:46 iter: 139 total_loss: 0.9363 loss_cls: 0.3477 loss_box_reg: 0.5706 loss_rpn_cls: 0.01248 loss_rpn_loc: 0.003268 time: 0.2744 data_time: 0.0135 lr: 0.00027972 max_mem: 2895M [05/09 00:49:21 d2.utils.events]: eta: 0:03:43 iter: 159 total_loss: 0.7916 loss_cls: 0.285 loss_box_reg: 0.4965 loss_rpn_cls: 0.005246 loss_rpn_loc: 0.005301 time: 0.2744 data_time: 0.0148 lr: 0.00031968 max_mem: 2895M [05/09 00:49:26 d2.utils.events]: eta: 0:03:38 iter: 179 total_loss: 0.8213 loss_cls: 0.2753 loss_box_reg: 0.5065 loss_rpn_cls: 0.005218 loss_rpn_loc: 0.005492 time: 0.2733 data_time: 0.0123 lr: 0.00035964 max_mem: 2895M [05/09 00:49:31 d2.utils.events]: eta: 0:03:32 iter: 199 total_loss: 0.5861 loss_cls: 0.1765 loss_box_reg: 0.3641 loss_rpn_cls: 0.00506 loss_rpn_loc: 0.005295 time: 0.2726 data_time: 0.0107 lr: 0.0003996 max_mem: 2895M [05/09 00:49:37 d2.utils.events]: eta: 0:03:27 iter: 219 total_loss: 0.6405 loss_cls: 0.2402 loss_box_reg: 0.3636 loss_rpn_cls: 0.003551 loss_rpn_loc: 0.005702 time: 0.2724 data_time: 0.0153 lr: 0.00043956 max_mem: 2895M [05/09 00:49:42 d2.utils.events]: eta: 0:03:21 iter: 239 total_loss: 0.5892 loss_cls: 0.2492 loss_box_reg: 0.3051 loss_rpn_cls: 0.004994 loss_rpn_loc: 0.007571 time: 0.2719 data_time: 0.0121 lr: 0.00047952 max_mem: 2895M [05/09 00:49:47 d2.utils.events]: eta: 0:03:14 iter: 259 total_loss: 0.506 loss_cls: 0.2352 loss_box_reg: 0.2684 loss_rpn_cls: 0.003406 loss_rpn_loc: 0.006904 time: 0.2704 data_time: 0.0128 lr: 0.00051948 max_mem: 2895M [05/09 00:49:52 d2.utils.events]: eta: 0:03:09 iter: 279 total_loss: 0.5366 loss_cls: 0.2182 loss_box_reg: 0.2895 loss_rpn_cls: 0.004898 loss_rpn_loc: 0.00624 time: 0.2697 data_time: 0.0141 lr: 0.00055944 max_mem: 2895M [05/09 00:49:58 d2.utils.events]: eta: 0:03:04 iter: 299 total_loss: 0.3997 loss_cls: 0.1956 loss_box_reg: 0.1964 loss_rpn_cls: 0.0007925 loss_rpn_loc: 0.006658 time: 0.2694 data_time: 0.0117 lr: 0.0005994 max_mem: 2895M [05/09 00:50:03 d2.utils.events]: eta: 0:02:58 iter: 319 total_loss: 0.5477 loss_cls: 0.2752 loss_box_reg: 0.2546 loss_rpn_cls: 0.002897 loss_rpn_loc: 0.005303 time: 0.2688 data_time: 0.0124 lr: 0.00063936 max_mem: 2895M [05/09 00:50:08 d2.utils.events]: eta: 0:02:54 iter: 339 total_loss: 0.4718 loss_cls: 0.2473 loss_box_reg: 0.2481 loss_rpn_cls: 0.003662 loss_rpn_loc: 0.004988 time: 0.2694 data_time: 0.0141 lr: 0.00067932 max_mem: 2895M [05/09 00:50:14 d2.utils.events]: eta: 0:02:49 iter: 359 total_loss: 0.4421 loss_cls: 0.208 loss_box_reg: 0.2091 loss_rpn_cls: 0.001212 loss_rpn_loc: 0.004857 time: 0.2693 data_time: 0.0143 lr: 0.00071928 max_mem: 2895M [05/09 00:50:19 d2.utils.events]: eta: 0:02:43 iter: 379 total_loss: 0.5215 loss_cls: 0.2466 loss_box_reg: 0.2727 loss_rpn_cls: 0.005805 loss_rpn_loc: 0.006626 time: 0.2691 data_time: 0.0108 lr: 0.00075924 max_mem: 2895M [05/09 00:50:24 d2.utils.events]: eta: 0:02:39 iter: 399 total_loss: 0.4769 loss_cls: 0.2007 loss_box_reg: 0.257 loss_rpn_cls: 0.001677 loss_rpn_loc: 0.005929 time: 0.2689 data_time: 0.0095 lr: 0.0007992 max_mem: 2895M [05/09 00:50:30 d2.utils.events]: eta: 0:02:34 iter: 419 total_loss: 0.4454 loss_cls: 0.2009 loss_box_reg: 0.2315 loss_rpn_cls: 0.001209 loss_rpn_loc: 0.006166 time: 0.2692 data_time: 0.0124 lr: 0.00083916 max_mem: 2895M [05/09 00:50:35 d2.utils.events]: eta: 0:02:28 iter: 439 total_loss: 0.4667 loss_cls: 0.1965 loss_box_reg: 0.2467 loss_rpn_cls: 0.00139 loss_rpn_loc: 0.006625 time: 0.2689 data_time: 0.0124 lr: 0.00087912 max_mem: 2895M [05/09 00:50:41 d2.utils.events]: eta: 0:02:23 iter: 459 total_loss: 0.4424 loss_cls: 0.2092 loss_box_reg: 0.2734 loss_rpn_cls: 0.0009896 loss_rpn_loc: 0.005665 time: 0.2689 data_time: 0.0115 lr: 0.00091908 max_mem: 2895M [05/09 00:50:46 d2.utils.events]: eta: 0:02:17 iter: 479 total_loss: 0.4396 loss_cls: 0.2091 loss_box_reg: 0.2079 loss_rpn_cls: 0.002499 loss_rpn_loc: 0.007423 time: 0.2685 data_time: 0.0109 lr: 0.00095904 max_mem: 2895M [05/09 00:50:51 d2.utils.events]: eta: 0:02:12 iter: 499 total_loss: 0.4761 loss_cls: 0.215 loss_box_reg: 0.2123 loss_rpn_cls: 0.000724 loss_rpn_loc: 0.00729 time: 0.2684 data_time: 0.0127 lr: 0.000999 max_mem: 2895M [05/09 00:50:56 d2.utils.events]: eta: 0:02:07 iter: 519 total_loss: 0.4595 loss_cls: 0.2071 loss_box_reg: 0.2559 loss_rpn_cls: 0.0009723 loss_rpn_loc: 0.006114 time: 0.2683 data_time: 0.0121 lr: 0.001039 max_mem: 2895M [05/09 00:51:02 d2.utils.events]: eta: 0:02:01 iter: 539 total_loss: 0.4348 loss_cls: 0.1874 loss_box_reg: 0.2136 loss_rpn_cls: 0.003308 loss_rpn_loc: 0.005711 time: 0.2681 data_time: 0.0160 lr: 0.0010789 max_mem: 2895M [05/09 00:51:07 d2.utils.events]: eta: 0:01:56 iter: 559 total_loss: 0.4391 loss_cls: 0.2033 loss_box_reg: 0.1957 loss_rpn_cls: 0.00218 loss_rpn_loc: 0.004848 time: 0.2682 data_time: 0.0107 lr: 0.0011189 max_mem: 2895M [05/09 00:51:13 d2.utils.events]: eta: 0:01:51 iter: 579 total_loss: 0.4409 loss_cls: 0.223 loss_box_reg: 0.2327 loss_rpn_cls: 0.0008134 loss_rpn_loc: 0.005215 time: 0.2681 data_time: 0.0130 lr: 0.0011588 max_mem: 2895M [05/09 00:51:18 d2.utils.events]: eta: 0:01:46 iter: 599 total_loss: 0.3866 loss_cls: 0.1817 loss_box_reg: 0.1796 loss_rpn_cls: 0.001697 loss_rpn_loc: 0.007755 time: 0.2682 data_time: 0.0109 lr: 0.0011988 max_mem: 2895M [05/09 00:51:23 d2.utils.events]: eta: 0:01:40 iter: 619 total_loss: 0.4026 loss_cls: 0.1602 loss_box_reg: 0.2436 loss_rpn_cls: 0.002327 loss_rpn_loc: 0.005093 time: 0.2682 data_time: 0.0144 lr: 0.0012388 max_mem: 2895M [05/09 00:51:29 d2.utils.events]: eta: 0:01:35 iter: 639 total_loss: 0.372 loss_cls: 0.1553 loss_box_reg: 0.1873 loss_rpn_cls: 0.003113 loss_rpn_loc: 0.00546 time: 0.2680 data_time: 0.0117 lr: 0.0012787 max_mem: 2895M [05/09 00:51:34 d2.utils.events]: eta: 0:01:30 iter: 659 total_loss: 0.4043 loss_cls: 0.2116 loss_box_reg: 0.2117 loss_rpn_cls: 0.0008684 loss_rpn_loc: 0.005729 time: 0.2680 data_time: 0.0121 lr: 0.0013187 max_mem: 2895M [05/09 00:51:39 d2.utils.events]: eta: 0:01:24 iter: 679 total_loss: 0.5002 loss_cls: 0.2232 loss_box_reg: 0.2555 loss_rpn_cls: 0.001154 loss_rpn_loc: 0.005423 time: 0.2681 data_time: 0.0100 lr: 0.0013586 max_mem: 2895M [05/09 00:51:45 d2.utils.events]: eta: 0:01:19 iter: 699 total_loss: 0.392 loss_cls: 0.1622 loss_box_reg: 0.2408 loss_rpn_cls: 0.0014 loss_rpn_loc: 0.005848 time: 0.2681 data_time: 0.0141 lr: 0.0013986 max_mem: 2895M [05/09 00:51:50 d2.utils.events]: eta: 0:01:14 iter: 719 total_loss: 0.4301 loss_cls: 0.2027 loss_box_reg: 0.2085 loss_rpn_cls: 0.001928 loss_rpn_loc: 0.006883 time: 0.2678 data_time: 0.0123 lr: 0.0014386 max_mem: 2895M [05/09 00:51:55 d2.utils.events]: eta: 0:01:08 iter: 739 total_loss: 0.4679 loss_cls: 0.228 loss_box_reg: 0.2122 loss_rpn_cls: 0.001007 loss_rpn_loc: 0.005277 time: 0.2680 data_time: 0.0140 lr: 0.0014785 max_mem: 2895M [05/09 00:52:01 d2.utils.events]: eta: 0:01:03 iter: 759 total_loss: 0.3661 loss_cls: 0.1746 loss_box_reg: 0.1698 loss_rpn_cls: 0.00128 loss_rpn_loc: 0.007702 time: 0.2679 data_time: 0.0115 lr: 0.0015185 max_mem: 2895M [05/09 00:52:06 d2.utils.events]: eta: 0:00:58 iter: 779 total_loss: 0.4373 loss_cls: 0.1957 loss_box_reg: 0.243 loss_rpn_cls: 0.001587 loss_rpn_loc: 0.004933 time: 0.2679 data_time: 0.0118 lr: 0.0015584 max_mem: 2895M [05/09 00:52:11 d2.utils.events]: eta: 0:00:52 iter: 799 total_loss: 0.4411 loss_cls: 0.2009 loss_box_reg: 0.2033 loss_rpn_cls: 0.0007105 loss_rpn_loc: 0.004162 time: 0.2678 data_time: 0.0112 lr: 0.0015984 max_mem: 2895M [05/09 00:52:17 d2.utils.events]: eta: 0:00:47 iter: 819 total_loss: 0.4095 loss_cls: 0.1617 loss_box_reg: 0.2326 loss_rpn_cls: 0.003482 loss_rpn_loc: 0.003971 time: 0.2676 data_time: 0.0136 lr: 0.0016384 max_mem: 2895M [05/09 00:52:22 d2.utils.events]: eta: 0:00:42 iter: 839 total_loss: 0.3875 loss_cls: 0.206 loss_box_reg: 0.1927 loss_rpn_cls: 0.001513 loss_rpn_loc: 0.004942 time: 0.2675 data_time: 0.0136 lr: 0.0016783 max_mem: 2895M [05/09 00:52:27 d2.utils.events]: eta: 0:00:37 iter: 859 total_loss: 0.3919 loss_cls: 0.1638 loss_box_reg: 0.2097 loss_rpn_cls: 0.0009756 loss_rpn_loc: 0.005418 time: 0.2675 data_time: 0.0126 lr: 0.0017183 max_mem: 2895M [05/09 00:52:33 d2.utils.events]: eta: 0:00:31 iter: 879 total_loss: 0.5211 loss_cls: 0.2122 loss_box_reg: 0.2564 loss_rpn_cls: 0.002488 loss_rpn_loc: 0.006843 time: 0.2675 data_time: 0.0119 lr: 0.0017582 max_mem: 2895M [05/09 00:52:38 d2.utils.events]: eta: 0:00:26 iter: 899 total_loss: 0.4934 loss_cls: 0.1712 loss_box_reg: 0.2678 loss_rpn_cls: 0.001097 loss_rpn_loc: 0.006459 time: 0.2676 data_time: 0.0116 lr: 0.0017982 max_mem: 2895M [05/09 00:52:43 d2.utils.events]: eta: 0:00:21 iter: 919 total_loss: 0.4764 loss_cls: 0.1865 loss_box_reg: 0.2495 loss_rpn_cls: 0.001832 loss_rpn_loc: 0.006316 time: 0.2677 data_time: 0.0123 lr: 0.0018382 max_mem: 2895M [05/09 00:52:49 d2.utils.events]: eta: 0:00:15 iter: 939 total_loss: 0.5045 loss_cls: 0.2194 loss_box_reg: 0.2934 loss_rpn_cls: 0.002098 loss_rpn_loc: 0.006287 time: 0.2675 data_time: 0.0109 lr: 0.0018781 max_mem: 2895M [05/09 00:52:54 d2.utils.events]: eta: 0:00:10 iter: 959 total_loss: 0.4933 loss_cls: 0.213 loss_box_reg: 0.2692 loss_rpn_cls: 0.002477 loss_rpn_loc: 0.007896 time: 0.2674 data_time: 0.0132 lr: 0.0019181 max_mem: 2895M [05/09 00:52:59 d2.utils.events]: eta: 0:00:05 iter: 979 total_loss: 0.4543 loss_cls: 0.2021 loss_box_reg: 0.2715 loss_rpn_cls: 0.001956 loss_rpn_loc: 0.005839 time: 0.2673 data_time: 0.0134 lr: 0.001958 max_mem: 2895M [05/09 00:53:06 d2.utils.events]: eta: 0:00:00 iter: 999 total_loss: 0.4072 loss_cls: 0.1676 loss_box_reg: 0.2348 loss_rpn_cls: 0.001137 loss_rpn_loc: 0.005125 time: 0.2675 data_time: 0.0122 lr: 0.001998 max_mem: 2895M [05/09 00:53:06 d2.engine.hooks]: Overall training speed: 998 iterations in 0:04:26 (0.2675 s / it) [05/09 00:53:06 d2.engine.hooks]: Total training time: 0:04:29 (0:00:02 on hooks)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model
cfg.DATASETS.TEST = ("rel_actions_val3", )
predictor = DefaultPredictor(cfg)
[05/09 00:53:21 d2.checkpoint.c2_model_loading]: Following weights matched with model:
| Names in Model | Names in Checkpoint | Shapes |
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.* | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,1,1) |
| backbone.bottom_up.res2.0.conv2.* | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.0.conv3.* | backbone.bottom_up.res2.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.0.shortcut.* | backbone.bottom_up.res2.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.1.conv1.* | backbone.bottom_up.res2.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.1.conv2.* | backbone.bottom_up.res2.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.1.conv3.* | backbone.bottom_up.res2.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.2.conv1.* | backbone.bottom_up.res2.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.2.conv2.* | backbone.bottom_up.res2.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.2.conv3.* | backbone.bottom_up.res2.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res3.0.conv1.* | backbone.bottom_up.res3.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,256,1,1) |
| backbone.bottom_up.res3.0.conv2.* | backbone.bottom_up.res3.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.0.conv3.* | backbone.bottom_up.res3.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.0.shortcut.* | backbone.bottom_up.res3.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,256,1,1) |
| backbone.bottom_up.res3.1.conv1.* | backbone.bottom_up.res3.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.1.conv2.* | backbone.bottom_up.res3.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.1.conv3.* | backbone.bottom_up.res3.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.2.conv1.* | backbone.bottom_up.res3.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.2.conv2.* | backbone.bottom_up.res3.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.2.conv3.* | backbone.bottom_up.res3.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.3.conv1.* | backbone.bottom_up.res3.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.3.conv2.* | backbone.bottom_up.res3.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.3.conv3.* | backbone.bottom_up.res3.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res4.0.conv1.* | backbone.bottom_up.res4.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,512,1,1) |
| backbone.bottom_up.res4.0.conv2.* | backbone.bottom_up.res4.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.0.conv3.* | backbone.bottom_up.res4.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.0.shortcut.* | backbone.bottom_up.res4.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,512,1,1) |
| backbone.bottom_up.res4.1.conv1.* | backbone.bottom_up.res4.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.1.conv2.* | backbone.bottom_up.res4.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.1.conv3.* | backbone.bottom_up.res4.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.2.conv1.* | backbone.bottom_up.res4.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.2.conv2.* | backbone.bottom_up.res4.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.2.conv3.* | backbone.bottom_up.res4.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.3.conv1.* | backbone.bottom_up.res4.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.3.conv2.* | backbone.bottom_up.res4.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.3.conv3.* | backbone.bottom_up.res4.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.4.conv1.* | backbone.bottom_up.res4.4.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.4.conv2.* | backbone.bottom_up.res4.4.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.4.conv3.* | backbone.bottom_up.res4.4.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.5.conv1.* | backbone.bottom_up.res4.5.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.5.conv2.* | backbone.bottom_up.res4.5.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.5.conv3.* | backbone.bottom_up.res4.5.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res5.0.conv1.* | backbone.bottom_up.res5.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,1024,1,1) |
| backbone.bottom_up.res5.0.conv2.* | backbone.bottom_up.res5.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.0.conv3.* | backbone.bottom_up.res5.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.0.shortcut.* | backbone.bottom_up.res5.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,1024,1,1) |
| backbone.bottom_up.res5.1.conv1.* | backbone.bottom_up.res5.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.1.conv2.* | backbone.bottom_up.res5.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.1.conv3.* | backbone.bottom_up.res5.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.2.conv1.* | backbone.bottom_up.res5.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.2.conv2.* | backbone.bottom_up.res5.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.2.conv3.* | backbone.bottom_up.res5.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.stem.conv1.* | backbone.bottom_up.stem.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,3,7,7) |
| backbone.fpn_lateral2.* | backbone.fpn_lateral2.{bias,weight} | (256,) (256,256,1,1) |
| backbone.fpn_lateral3.* | backbone.fpn_lateral3.{bias,weight} | (256,) (256,512,1,1) |
| backbone.fpn_lateral4.* | backbone.fpn_lateral4.{bias,weight} | (256,) (256,1024,1,1) |
| backbone.fpn_lateral5.* | backbone.fpn_lateral5.{bias,weight} | (256,) (256,2048,1,1) |
| backbone.fpn_output2.* | backbone.fpn_output2.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output3.* | backbone.fpn_output3.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output4.* | backbone.fpn_output4.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output5.* | backbone.fpn_output5.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.anchor_deltas.* | proposal_generator.rpn_head.anchor_deltas.{bias,weight} | (12,) (12,256,1,1) |
| proposal_generator.rpn_head.conv.* | proposal_generator.rpn_head.conv.{bias,weight} | (256,) (256,256,3,3) |
| proposal_generator.rpn_head.objectness_logits.* | proposal_generator.rpn_head.objectness_logits.{bias,weight} | (3,) (3,256,1,1) |
| roi_heads.box_head.fc1.* | roi_heads.box_head.fc1.{bias,weight} | (1024,) (1024,12544) |
| roi_heads.box_head.fc2.* | roi_heads.box_head.fc2.{bias,weight} | (1024,) (1024,1024) |
| roi_heads.box_predictor.bbox_pred.* | roi_heads.box_predictor.bbox_pred.{bias,weight} | (28,) (28,1024) |
| roi_heads.box_predictor.cls_score.* | roi_heads.box_predictor.cls_score.{bias,weight} | (8,) (8,1024) |
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.engine import DefaultTrainer
evaluator = COCOEvaluator("rel_actions_val3", cfg, False, output_dir="./output_evals/")
val_loader = build_detection_test_loader(cfg, "rel_actions_val3")
inference_on_dataset(trainer.model, val_loader, evaluator)
WARNING [05/09 00:53:28 d2.evaluation.coco_evaluation]: COCO Evaluator instantiated using config, this is deprecated behavior. Please pass in explicit arguments instead. [05/09 00:53:28 d2.data.datasets.coco]: Loaded 2215 images in COCO format from ./data/instances_vcoco_actions_val_2014.json [05/09 00:53:29 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')] [05/09 00:53:29 d2.data.common]: Serializing 2215 elements to byte tensors and concatenating them all ... [05/09 00:53:29 d2.data.common]: Serialized dataset takes 5.66 MiB [05/09 00:53:29 d2.evaluation.evaluator]: Start inference on 2215 batches [05/09 00:53:30 d2.evaluation.evaluator]: Inference done 11/2215. Dataloading: 0.0016 s/iter. Inference: 0.0630 s/iter. Eval: 0.0003 s/iter. Total: 0.0649 s/iter. ETA=0:02:22 [05/09 00:53:35 d2.evaluation.evaluator]: Inference done 84/2215. Dataloading: 0.0022 s/iter. Inference: 0.0662 s/iter. Eval: 0.0003 s/iter. Total: 0.0688 s/iter. ETA=0:02:26 [05/09 00:53:40 d2.evaluation.evaluator]: Inference done 159/2215. Dataloading: 0.0023 s/iter. Inference: 0.0654 s/iter. Eval: 0.0003 s/iter. Total: 0.0681 s/iter. ETA=0:02:19 [05/09 00:53:45 d2.evaluation.evaluator]: Inference done 232/2215. Dataloading: 0.0023 s/iter. Inference: 0.0656 s/iter. Eval: 0.0003 s/iter. Total: 0.0683 s/iter. ETA=0:02:15 [05/09 00:53:50 d2.evaluation.evaluator]: Inference done 305/2215. Dataloading: 0.0023 s/iter. Inference: 0.0658 s/iter. Eval: 0.0003 s/iter. Total: 0.0685 s/iter. ETA=0:02:10 [05/09 00:53:55 d2.evaluation.evaluator]: Inference done 378/2215. Dataloading: 0.0023 s/iter. Inference: 0.0659 s/iter. Eval: 0.0003 s/iter. Total: 0.0686 s/iter. ETA=0:02:05 [05/09 00:54:00 d2.evaluation.evaluator]: Inference done 452/2215. Dataloading: 0.0023 s/iter. Inference: 0.0659 s/iter. Eval: 0.0003 s/iter. Total: 0.0685 s/iter. ETA=0:02:00 [05/09 00:54:05 d2.evaluation.evaluator]: Inference done 526/2215. Dataloading: 0.0023 s/iter. Inference: 0.0658 s/iter. Eval: 0.0003 s/iter. Total: 0.0684 s/iter. ETA=0:01:55 [05/09 00:54:10 d2.evaluation.evaluator]: Inference done 599/2215. Dataloading: 0.0023 s/iter. Inference: 0.0658 s/iter. Eval: 0.0003 s/iter. Total: 0.0685 s/iter. ETA=0:01:50 [05/09 00:54:15 d2.evaluation.evaluator]: Inference done 671/2215. Dataloading: 0.0023 s/iter. Inference: 0.0659 s/iter. Eval: 0.0003 s/iter. Total: 0.0686 s/iter. ETA=0:01:45 [05/09 00:54:20 d2.evaluation.evaluator]: Inference done 743/2215. Dataloading: 0.0023 s/iter. Inference: 0.0660 s/iter. Eval: 0.0003 s/iter. Total: 0.0687 s/iter. ETA=0:01:41 [05/09 00:54:25 d2.evaluation.evaluator]: Inference done 815/2215. Dataloading: 0.0023 s/iter. Inference: 0.0661 s/iter. Eval: 0.0003 s/iter. Total: 0.0688 s/iter. ETA=0:01:36 [05/09 00:54:30 d2.evaluation.evaluator]: Inference done 888/2215. Dataloading: 0.0023 s/iter. Inference: 0.0662 s/iter. Eval: 0.0003 s/iter. Total: 0.0688 s/iter. ETA=0:01:31 [05/09 00:54:35 d2.evaluation.evaluator]: Inference done 959/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:01:26 [05/09 00:54:40 d2.evaluation.evaluator]: Inference done 1032/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:01:21 [05/09 00:54:45 d2.evaluation.evaluator]: Inference done 1105/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:01:16 [05/09 00:54:50 d2.evaluation.evaluator]: Inference done 1178/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:01:11 [05/09 00:54:55 d2.evaluation.evaluator]: Inference done 1250/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:01:06 [05/09 00:55:00 d2.evaluation.evaluator]: Inference done 1322/2215. Dataloading: 0.0023 s/iter. Inference: 0.0664 s/iter. Eval: 0.0003 s/iter. Total: 0.0691 s/iter. ETA=0:01:01 [05/09 00:55:05 d2.evaluation.evaluator]: Inference done 1395/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:56 [05/09 00:55:10 d2.evaluation.evaluator]: Inference done 1468/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:51 [05/09 00:55:15 d2.evaluation.evaluator]: Inference done 1541/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0691 s/iter. ETA=0:00:46 [05/09 00:55:21 d2.evaluation.evaluator]: Inference done 1613/2215. Dataloading: 0.0023 s/iter. Inference: 0.0664 s/iter. Eval: 0.0003 s/iter. Total: 0.0691 s/iter. ETA=0:00:41 [05/09 00:55:26 d2.evaluation.evaluator]: Inference done 1686/2215. Dataloading: 0.0023 s/iter. Inference: 0.0664 s/iter. Eval: 0.0003 s/iter. Total: 0.0691 s/iter. ETA=0:00:36 [05/09 00:55:31 d2.evaluation.evaluator]: Inference done 1759/2215. Dataloading: 0.0023 s/iter. Inference: 0.0664 s/iter. Eval: 0.0003 s/iter. Total: 0.0691 s/iter. ETA=0:00:31 [05/09 00:55:36 d2.evaluation.evaluator]: Inference done 1833/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0691 s/iter. ETA=0:00:26 [05/09 00:55:41 d2.evaluation.evaluator]: Inference done 1907/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:21 [05/09 00:55:46 d2.evaluation.evaluator]: Inference done 1979/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:16 [05/09 00:55:51 d2.evaluation.evaluator]: Inference done 2053/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:11 [05/09 00:55:56 d2.evaluation.evaluator]: Inference done 2126/2215. Dataloading: 0.0023 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:06 [05/09 00:56:01 d2.evaluation.evaluator]: Inference done 2199/2215. Dataloading: 0.0024 s/iter. Inference: 0.0663 s/iter. Eval: 0.0003 s/iter. Total: 0.0690 s/iter. ETA=0:00:01 [05/09 00:56:02 d2.evaluation.evaluator]: Total inference time: 0:02:32.564105 (0.069034 s / iter per device, on 1 devices) [05/09 00:56:02 d2.evaluation.evaluator]: Total inference pure compute time: 0:02:26 (0.066264 s / iter per device, on 1 devices) [05/09 00:56:02 d2.evaluation.coco_evaluation]: Preparing results for COCO format ... [05/09 00:56:02 d2.evaluation.coco_evaluation]: Saving results to ./output_evals/coco_instances_results.json [05/09 00:56:02 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API... Loading and preparing results... DONE (t=0.05s) creating index... index created! [05/09 00:56:02 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox* [05/09 00:56:03 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 0.72 seconds. [05/09 00:56:03 d2.evaluation.fast_eval_api]: Accumulating evaluation results... [05/09 00:56:03 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.15 seconds. Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.108 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.170 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.124 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.152 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.114 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.115 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.206 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.206 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.500 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.235 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.207 [05/09 00:56:03 d2.evaluation.coco_evaluation]: Evaluation results for bbox: | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:------:|:------:|:------:| | 10.818 | 16.965 | 12.392 | 50.000 | 15.232 | 11.430 | [05/09 00:56:03 d2.evaluation.coco_evaluation]: Per-category bbox AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-----------|:------|:-----------|:-------| | hold | 39.873 | carry | 0.000 | point | 0.000 | | eat | 0.423 | drink | 0.000 | stand | 35.430 | | talk_on_phone | 0.000 | | | | |
OrderedDict([('bbox',
{'AP': 10.818063676259921,
'AP-carry': 0.0,
'AP-drink': 0.0,
'AP-eat': 0.4225658325436689,
'AP-hold': 39.87349445213421,
'AP-point': 0.0,
'AP-stand': 35.43038544914159,
'AP-talk_on_phone': 0.0,
'AP50': 16.964607948315617,
'AP75': 12.391597443236359,
'APl': 11.429539806129627,
'APm': 15.231991371583723,
'APs': 50.0})])
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
import os
from detectron2 import model_zoo
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/retinanet_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("rel_actions1",)
cfg.DATASETS.TEST = () # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/retinanet_R_101_FPN_3x.yaml") # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.002
cfg.SOLVER.MAX_ITER = 1000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128 # faster, and good enough for this toy dataset
# cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(relevant_classes) # all relevant classes
cfg.MODEL.RETINANET.NUM_CLASSES = len(relevant_classes)
cfg.OUTPUT_DIR = "./outputs/action_only_detector"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()
Loading config /content/drive/.shortcut-targets-by-id/1Eg2dTJ9lloHImzZiTfOZ7_jOP1sTh--c/6.869 Project/v-coco/detectron2_repo/detectron2/model_zoo/configs/COCO-Detection/../Base-RetinaNet.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.
[05/09 01:02:54 d2.engine.defaults]: Model: RetinaNet( (backbone): FPN( (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1)) (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (top_block): LastLevelP6P7( (p6): Conv2d(2048, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) (p7): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1)) ) (bottom_up): ResNet( (stem): BasicStem( (conv1): Conv2d( 3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) ) (res2): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv1): Conv2d( 64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv2): Conv2d( 64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05) ) (conv3): Conv2d( 64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) ) ) (res3): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv1): Conv2d( 256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv2): Conv2d( 128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05) ) (conv3): Conv2d( 128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) ) ) (res4): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) (conv1): Conv2d( 512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (3): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (4): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (5): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (6): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (7): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (8): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (9): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (10): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (11): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (12): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (13): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (14): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (15): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (16): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (17): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (18): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (19): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (20): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (21): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) (22): BottleneckBlock( (conv1): Conv2d( 1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv2): Conv2d( 256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05) ) (conv3): Conv2d( 256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05) ) ) ) (res5): Sequential( (0): BottleneckBlock( (shortcut): Conv2d( 1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) (conv1): Conv2d( 1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (1): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) (2): BottleneckBlock( (conv1): Conv2d( 2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv2): Conv2d( 512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05) ) (conv3): Conv2d( 512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05) ) ) ) ) ) (head): RetinaNetHead( (cls_subnet): Sequential( (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU() (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU() (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (5): ReLU() (6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (7): ReLU() ) (bbox_subnet): Sequential( (0): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU() (2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU() (4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (5): ReLU() (6): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (7): ReLU() ) (cls_score): Conv2d(256, 63, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (bbox_pred): Conv2d(256, 36, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) ) (anchor_generator): DefaultAnchorGenerator( (cell_anchors): BufferList() ) ) [05/09 01:02:55 d2.data.datasets.coco]: Loaded 1951 images in COCO format from ./data/instances_vcoco_actions_train.json [05/09 01:02:55 d2.data.build]: Removed 0 images with no usable annotations. 1951 images left. [05/09 01:02:55 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()] [05/09 01:02:55 d2.data.build]: Using training sampler TrainingSampler [05/09 01:02:55 d2.data.common]: Serializing 1951 elements to byte tensors and concatenating them all ... [05/09 01:02:55 d2.data.common]: Serialized dataset takes 4.90 MiB WARNING [05/09 01:02:55 d2.solver.build]: SOLVER.STEPS contains values larger than SOLVER.MAX_ITER. These values will be ignored.
model_final_971ab9.pkl: 228MB [00:17, 12.8MB/s] Skip loading parameter 'head.cls_score.weight' to the model due to incompatible shapes: (720, 256, 3, 3) in the checkpoint but (63, 256, 3, 3) in the model! You might want to double check if this is expected. Skip loading parameter 'head.cls_score.bias' to the model due to incompatible shapes: (720,) in the checkpoint but (63,) in the model! You might want to double check if this is expected. Some model parameters or buffers are not found in the checkpoint: head.cls_score.{bias, weight} The checkpoint state_dict contains keys that are not used by the model: pixel_mean pixel_std
[05/09 01:03:13 d2.engine.train_loop]: Starting training from iteration 0 [05/09 01:03:27 d2.utils.events]: eta: 0:06:35 iter: 19 total_loss: 1.291 loss_cls: 1.149 loss_box_reg: 0.09464 time: 0.7037 data_time: 0.0223 lr: 3.9962e-05 max_mem: 3138M [05/09 01:03:34 d2.utils.events]: eta: 0:05:57 iter: 39 total_loss: 1.104 loss_cls: 0.9954 loss_box_reg: 0.0798 time: 0.5247 data_time: 0.0123 lr: 7.9922e-05 max_mem: 3138M [05/09 01:03:41 d2.utils.events]: eta: 0:05:41 iter: 59 total_loss: 0.9339 loss_cls: 0.8588 loss_box_reg: 0.07811 time: 0.4640 data_time: 0.0104 lr: 0.00011988 max_mem: 3138M [05/09 01:03:48 d2.utils.events]: eta: 0:05:34 iter: 79 total_loss: 0.8149 loss_cls: 0.7471 loss_box_reg: 0.08631 time: 0.4387 data_time: 0.0119 lr: 0.00015984 max_mem: 3138M [05/09 01:03:56 d2.utils.events]: eta: 0:05:27 iter: 99 total_loss: 0.5795 loss_cls: 0.4521 loss_box_reg: 0.1185 time: 0.4256 data_time: 0.0115 lr: 0.0001998 max_mem: 3138M [05/09 01:04:04 d2.utils.events]: eta: 0:05:20 iter: 119 total_loss: 0.4047 loss_cls: 0.3367 loss_box_reg: 0.0666 time: 0.4191 data_time: 0.0104 lr: 0.00023976 max_mem: 3194M [05/09 01:04:11 d2.utils.events]: eta: 0:05:13 iter: 139 total_loss: 0.4907 loss_cls: 0.3991 loss_box_reg: 0.08291 time: 0.4126 data_time: 0.0117 lr: 0.00027972 max_mem: 3194M [05/09 01:04:19 d2.utils.events]: eta: 0:05:05 iter: 159 total_loss: 0.4396 loss_cls: 0.3368 loss_box_reg: 0.09362 time: 0.4077 data_time: 0.0156 lr: 0.00031968 max_mem: 3194M [05/09 01:04:27 d2.utils.events]: eta: 0:04:59 iter: 179 total_loss: 0.407 loss_cls: 0.3104 loss_box_reg: 0.08639 time: 0.4065 data_time: 0.0112 lr: 0.00035964 max_mem: 3196M [05/09 01:04:35 d2.utils.events]: eta: 0:04:55 iter: 199 total_loss: 0.4064 loss_cls: 0.2811 loss_box_reg: 0.1042 time: 0.4073 data_time: 0.0196 lr: 0.0003996 max_mem: 3196M [05/09 01:04:42 d2.utils.events]: eta: 0:04:47 iter: 219 total_loss: 0.4155 loss_cls: 0.3076 loss_box_reg: 0.1106 time: 0.4038 data_time: 0.0110 lr: 0.00043956 max_mem: 3196M [05/09 01:04:50 d2.utils.events]: eta: 0:04:39 iter: 239 total_loss: 0.2786 loss_cls: 0.201 loss_box_reg: 0.07397 time: 0.4000 data_time: 0.0100 lr: 0.00047952 max_mem: 3196M [05/09 01:04:57 d2.utils.events]: eta: 0:04:32 iter: 259 total_loss: 0.3907 loss_cls: 0.2901 loss_box_reg: 0.07736 time: 0.3987 data_time: 0.0121 lr: 0.00051948 max_mem: 3196M [05/09 01:05:05 d2.utils.events]: eta: 0:04:25 iter: 279 total_loss: 0.3119 loss_cls: 0.2405 loss_box_reg: 0.07196 time: 0.3970 data_time: 0.0118 lr: 0.00055944 max_mem: 3196M [05/09 01:05:12 d2.utils.events]: eta: 0:04:17 iter: 299 total_loss: 0.3761 loss_cls: 0.2507 loss_box_reg: 0.09403 time: 0.3945 data_time: 0.0122 lr: 0.0005994 max_mem: 3196M [05/09 01:05:19 d2.utils.events]: eta: 0:04:09 iter: 319 total_loss: 0.3592 loss_cls: 0.2351 loss_box_reg: 0.09504 time: 0.3922 data_time: 0.0115 lr: 0.00063936 max_mem: 3196M [05/09 01:05:26 d2.utils.events]: eta: 0:04:01 iter: 339 total_loss: 0.4348 loss_cls: 0.2863 loss_box_reg: 0.1556 time: 0.3908 data_time: 0.0143 lr: 0.00067932 max_mem: 3196M [05/09 01:05:34 d2.utils.events]: eta: 0:03:54 iter: 359 total_loss: 0.3023 loss_cls: 0.2222 loss_box_reg: 0.07102 time: 0.3898 data_time: 0.0156 lr: 0.00071928 max_mem: 3196M [05/09 01:05:41 d2.utils.events]: eta: 0:03:46 iter: 379 total_loss: 0.434 loss_cls: 0.285 loss_box_reg: 0.1336 time: 0.3881 data_time: 0.0127 lr: 0.00075924 max_mem: 3196M [05/09 01:05:48 d2.utils.events]: eta: 0:03:39 iter: 399 total_loss: 0.4044 loss_cls: 0.3018 loss_box_reg: 0.1027 time: 0.3867 data_time: 0.0126 lr: 0.0007992 max_mem: 3196M [05/09 01:05:56 d2.utils.events]: eta: 0:03:31 iter: 419 total_loss: 0.4142 loss_cls: 0.2887 loss_box_reg: 0.1244 time: 0.3858 data_time: 0.0161 lr: 0.00083916 max_mem: 3196M [05/09 01:06:03 d2.utils.events]: eta: 0:03:24 iter: 439 total_loss: 0.4155 loss_cls: 0.2626 loss_box_reg: 0.1158 time: 0.3845 data_time: 0.0118 lr: 0.00087912 max_mem: 3196M [05/09 01:06:10 d2.utils.events]: eta: 0:03:16 iter: 459 total_loss: 0.346 loss_cls: 0.2178 loss_box_reg: 0.1042 time: 0.3837 data_time: 0.0130 lr: 0.00091908 max_mem: 3196M [05/09 01:06:18 d2.utils.events]: eta: 0:03:09 iter: 479 total_loss: 0.4098 loss_cls: 0.2707 loss_box_reg: 0.1092 time: 0.3833 data_time: 0.0130 lr: 0.00095904 max_mem: 3196M [05/09 01:06:25 d2.utils.events]: eta: 0:03:02 iter: 499 total_loss: 0.4552 loss_cls: 0.2975 loss_box_reg: 0.1461 time: 0.3831 data_time: 0.0134 lr: 0.000999 max_mem: 3196M [05/09 01:06:33 d2.utils.events]: eta: 0:02:55 iter: 519 total_loss: 0.4332 loss_cls: 0.279 loss_box_reg: 0.1033 time: 0.3825 data_time: 0.0125 lr: 0.001039 max_mem: 3196M [05/09 01:06:40 d2.utils.events]: eta: 0:02:47 iter: 539 total_loss: 0.3853 loss_cls: 0.2663 loss_box_reg: 0.1248 time: 0.3823 data_time: 0.0123 lr: 0.0010789 max_mem: 3196M [05/09 01:06:48 d2.utils.events]: eta: 0:02:40 iter: 559 total_loss: 0.3877 loss_cls: 0.2317 loss_box_reg: 0.1276 time: 0.3819 data_time: 0.0112 lr: 0.0011189 max_mem: 3196M [05/09 01:06:55 d2.utils.events]: eta: 0:02:33 iter: 579 total_loss: 0.3368 loss_cls: 0.2089 loss_box_reg: 0.1093 time: 0.3811 data_time: 0.0120 lr: 0.0011588 max_mem: 3196M [05/09 01:07:02 d2.utils.events]: eta: 0:02:25 iter: 599 total_loss: 0.3815 loss_cls: 0.2697 loss_box_reg: 0.1116 time: 0.3806 data_time: 0.0116 lr: 0.0011988 max_mem: 3196M [05/09 01:07:09 d2.utils.events]: eta: 0:02:18 iter: 619 total_loss: 0.3684 loss_cls: 0.2387 loss_box_reg: 0.09971 time: 0.3800 data_time: 0.0132 lr: 0.0012388 max_mem: 3196M [05/09 01:07:17 d2.utils.events]: eta: 0:02:10 iter: 639 total_loss: 0.4873 loss_cls: 0.3165 loss_box_reg: 0.1119 time: 0.3795 data_time: 0.0110 lr: 0.0012787 max_mem: 3196M [05/09 01:07:24 d2.utils.events]: eta: 0:02:03 iter: 659 total_loss: 0.3893 loss_cls: 0.2669 loss_box_reg: 0.1129 time: 0.3789 data_time: 0.0141 lr: 0.0013187 max_mem: 3196M [05/09 01:07:31 d2.utils.events]: eta: 0:01:56 iter: 679 total_loss: 0.485 loss_cls: 0.326 loss_box_reg: 0.1377 time: 0.3784 data_time: 0.0132 lr: 0.0013586 max_mem: 3196M [05/09 01:07:39 d2.utils.events]: eta: 0:01:49 iter: 699 total_loss: 0.4071 loss_cls: 0.2839 loss_box_reg: 0.14 time: 0.3782 data_time: 0.0130 lr: 0.0013986 max_mem: 3196M [05/09 01:07:46 d2.utils.events]: eta: 0:01:41 iter: 719 total_loss: 0.3932 loss_cls: 0.2767 loss_box_reg: 0.1146 time: 0.3776 data_time: 0.0135 lr: 0.0014386 max_mem: 3196M [05/09 01:07:53 d2.utils.events]: eta: 0:01:34 iter: 739 total_loss: 0.3812 loss_cls: 0.2876 loss_box_reg: 0.1232 time: 0.3775 data_time: 0.0152 lr: 0.0014785 max_mem: 3196M [05/09 01:08:01 d2.utils.events]: eta: 0:01:27 iter: 759 total_loss: 0.4658 loss_cls: 0.2737 loss_box_reg: 0.1709 time: 0.3774 data_time: 0.0113 lr: 0.0015185 max_mem: 3196M [05/09 01:08:08 d2.utils.events]: eta: 0:01:20 iter: 779 total_loss: 0.4578 loss_cls: 0.2988 loss_box_reg: 0.1496 time: 0.3772 data_time: 0.0114 lr: 0.0015584 max_mem: 3196M [05/09 01:08:16 d2.utils.events]: eta: 0:01:12 iter: 799 total_loss: 0.5417 loss_cls: 0.3343 loss_box_reg: 0.1399 time: 0.3771 data_time: 0.0133 lr: 0.0015984 max_mem: 3196M [05/09 01:08:23 d2.utils.events]: eta: 0:01:05 iter: 819 total_loss: 0.4969 loss_cls: 0.3531 loss_box_reg: 0.128 time: 0.3770 data_time: 0.0114 lr: 0.0016384 max_mem: 3196M [05/09 01:08:30 d2.utils.events]: eta: 0:00:58 iter: 839 total_loss: 0.4378 loss_cls: 0.289 loss_box_reg: 0.1266 time: 0.3767 data_time: 0.0113 lr: 0.0016783 max_mem: 3196M [05/09 01:08:38 d2.utils.events]: eta: 0:00:50 iter: 859 total_loss: 0.7647 loss_cls: 0.5698 loss_box_reg: 0.1361 time: 0.3765 data_time: 0.0127 lr: 0.0017183 max_mem: 3196M [05/09 01:08:45 d2.utils.events]: eta: 0:00:43 iter: 879 total_loss: 0.5889 loss_cls: 0.3976 loss_box_reg: 0.1799 time: 0.3765 data_time: 0.0145 lr: 0.0017582 max_mem: 3196M [05/09 01:08:52 d2.utils.events]: eta: 0:00:36 iter: 899 total_loss: 0.6933 loss_cls: 0.5052 loss_box_reg: 0.1531 time: 0.3760 data_time: 0.0105 lr: 0.0017982 max_mem: 3196M [05/09 01:09:00 d2.utils.events]: eta: 0:00:29 iter: 919 total_loss: 0.4321 loss_cls: 0.2709 loss_box_reg: 0.1463 time: 0.3758 data_time: 0.0117 lr: 0.0018382 max_mem: 3196M [05/09 01:09:07 d2.utils.events]: eta: 0:00:21 iter: 939 total_loss: 0.4459 loss_cls: 0.2775 loss_box_reg: 0.1288 time: 0.3757 data_time: 0.0113 lr: 0.0018781 max_mem: 3196M [05/09 01:09:15 d2.utils.events]: eta: 0:00:14 iter: 959 total_loss: 0.4141 loss_cls: 0.2734 loss_box_reg: 0.1423 time: 0.3757 data_time: 0.0124 lr: 0.0019181 max_mem: 3196M [05/09 01:09:22 d2.utils.events]: eta: 0:00:07 iter: 979 total_loss: 0.4112 loss_cls: 0.2543 loss_box_reg: 0.1521 time: 0.3754 data_time: 0.0140 lr: 0.001958 max_mem: 3196M [05/09 01:09:31 d2.utils.events]: eta: 0:00:00 iter: 999 total_loss: 0.4413 loss_cls: 0.2875 loss_box_reg: 0.1359 time: 0.3755 data_time: 0.0117 lr: 0.001998 max_mem: 3196M [05/09 01:09:31 d2.engine.hooks]: Overall training speed: 998 iterations in 0:06:14 (0.3755 s / it) [05/09 01:09:31 d2.engine.hooks]: Total training time: 0:06:17 (0:00:02 on hooks)
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
# cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5 # set the testing threshold for this model
cfg.MODEL.RETINANET.SCORE_THRESH_TEST = 0.5
cfg.DATASETS.TEST = ("rel_actions_val3", )
predictor = DefaultPredictor(cfg)
[05/09 01:26:08 d2.checkpoint.c2_model_loading]: Following weights matched with model:
| Names in Model | Names in Checkpoint | Shapes |
|:-------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.* | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,1,1) |
| backbone.bottom_up.res2.0.conv2.* | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.0.conv3.* | backbone.bottom_up.res2.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.0.shortcut.* | backbone.bottom_up.res2.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.1.conv1.* | backbone.bottom_up.res2.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.1.conv2.* | backbone.bottom_up.res2.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.1.conv3.* | backbone.bottom_up.res2.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res2.2.conv1.* | backbone.bottom_up.res2.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,256,1,1) |
| backbone.bottom_up.res2.2.conv2.* | backbone.bottom_up.res2.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,64,3,3) |
| backbone.bottom_up.res2.2.conv3.* | backbone.bottom_up.res2.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1) |
| backbone.bottom_up.res3.0.conv1.* | backbone.bottom_up.res3.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,256,1,1) |
| backbone.bottom_up.res3.0.conv2.* | backbone.bottom_up.res3.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.0.conv3.* | backbone.bottom_up.res3.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.0.shortcut.* | backbone.bottom_up.res3.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,256,1,1) |
| backbone.bottom_up.res3.1.conv1.* | backbone.bottom_up.res3.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.1.conv2.* | backbone.bottom_up.res3.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.1.conv3.* | backbone.bottom_up.res3.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.2.conv1.* | backbone.bottom_up.res3.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.2.conv2.* | backbone.bottom_up.res3.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.2.conv3.* | backbone.bottom_up.res3.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res3.3.conv1.* | backbone.bottom_up.res3.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,512,1,1) |
| backbone.bottom_up.res3.3.conv2.* | backbone.bottom_up.res3.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (128,) (128,) (128,) (128,) (128,128,3,3) |
| backbone.bottom_up.res3.3.conv3.* | backbone.bottom_up.res3.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,128,1,1) |
| backbone.bottom_up.res4.0.conv1.* | backbone.bottom_up.res4.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,512,1,1) |
| backbone.bottom_up.res4.0.conv2.* | backbone.bottom_up.res4.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.0.conv3.* | backbone.bottom_up.res4.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.0.shortcut.* | backbone.bottom_up.res4.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,512,1,1) |
| backbone.bottom_up.res4.1.conv1.* | backbone.bottom_up.res4.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.1.conv2.* | backbone.bottom_up.res4.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.1.conv3.* | backbone.bottom_up.res4.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.10.conv1.* | backbone.bottom_up.res4.10.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.10.conv2.* | backbone.bottom_up.res4.10.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.10.conv3.* | backbone.bottom_up.res4.10.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.11.conv1.* | backbone.bottom_up.res4.11.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.11.conv2.* | backbone.bottom_up.res4.11.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.11.conv3.* | backbone.bottom_up.res4.11.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.12.conv1.* | backbone.bottom_up.res4.12.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.12.conv2.* | backbone.bottom_up.res4.12.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.12.conv3.* | backbone.bottom_up.res4.12.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.13.conv1.* | backbone.bottom_up.res4.13.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.13.conv2.* | backbone.bottom_up.res4.13.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.13.conv3.* | backbone.bottom_up.res4.13.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.14.conv1.* | backbone.bottom_up.res4.14.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.14.conv2.* | backbone.bottom_up.res4.14.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.14.conv3.* | backbone.bottom_up.res4.14.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.15.conv1.* | backbone.bottom_up.res4.15.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.15.conv2.* | backbone.bottom_up.res4.15.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.15.conv3.* | backbone.bottom_up.res4.15.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.16.conv1.* | backbone.bottom_up.res4.16.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.16.conv2.* | backbone.bottom_up.res4.16.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.16.conv3.* | backbone.bottom_up.res4.16.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.17.conv1.* | backbone.bottom_up.res4.17.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.17.conv2.* | backbone.bottom_up.res4.17.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.17.conv3.* | backbone.bottom_up.res4.17.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.18.conv1.* | backbone.bottom_up.res4.18.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.18.conv2.* | backbone.bottom_up.res4.18.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.18.conv3.* | backbone.bottom_up.res4.18.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.19.conv1.* | backbone.bottom_up.res4.19.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.19.conv2.* | backbone.bottom_up.res4.19.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.19.conv3.* | backbone.bottom_up.res4.19.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.2.conv1.* | backbone.bottom_up.res4.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.2.conv2.* | backbone.bottom_up.res4.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.2.conv3.* | backbone.bottom_up.res4.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.20.conv1.* | backbone.bottom_up.res4.20.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.20.conv2.* | backbone.bottom_up.res4.20.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.20.conv3.* | backbone.bottom_up.res4.20.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.21.conv1.* | backbone.bottom_up.res4.21.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.21.conv2.* | backbone.bottom_up.res4.21.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.21.conv3.* | backbone.bottom_up.res4.21.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.22.conv1.* | backbone.bottom_up.res4.22.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.22.conv2.* | backbone.bottom_up.res4.22.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.22.conv3.* | backbone.bottom_up.res4.22.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.3.conv1.* | backbone.bottom_up.res4.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.3.conv2.* | backbone.bottom_up.res4.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.3.conv3.* | backbone.bottom_up.res4.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.4.conv1.* | backbone.bottom_up.res4.4.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.4.conv2.* | backbone.bottom_up.res4.4.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.4.conv3.* | backbone.bottom_up.res4.4.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.5.conv1.* | backbone.bottom_up.res4.5.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.5.conv2.* | backbone.bottom_up.res4.5.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.5.conv3.* | backbone.bottom_up.res4.5.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.6.conv1.* | backbone.bottom_up.res4.6.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.6.conv2.* | backbone.bottom_up.res4.6.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.6.conv3.* | backbone.bottom_up.res4.6.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.7.conv1.* | backbone.bottom_up.res4.7.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.7.conv2.* | backbone.bottom_up.res4.7.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.7.conv3.* | backbone.bottom_up.res4.7.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.8.conv1.* | backbone.bottom_up.res4.8.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.8.conv2.* | backbone.bottom_up.res4.8.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.8.conv3.* | backbone.bottom_up.res4.8.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res4.9.conv1.* | backbone.bottom_up.res4.9.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,1024,1,1) |
| backbone.bottom_up.res4.9.conv2.* | backbone.bottom_up.res4.9.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,256,3,3) |
| backbone.bottom_up.res4.9.conv3.* | backbone.bottom_up.res4.9.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1) |
| backbone.bottom_up.res5.0.conv1.* | backbone.bottom_up.res5.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,1024,1,1) |
| backbone.bottom_up.res5.0.conv2.* | backbone.bottom_up.res5.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.0.conv3.* | backbone.bottom_up.res5.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.0.shortcut.* | backbone.bottom_up.res5.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,1024,1,1) |
| backbone.bottom_up.res5.1.conv1.* | backbone.bottom_up.res5.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.1.conv2.* | backbone.bottom_up.res5.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.1.conv3.* | backbone.bottom_up.res5.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.res5.2.conv1.* | backbone.bottom_up.res5.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,2048,1,1) |
| backbone.bottom_up.res5.2.conv2.* | backbone.bottom_up.res5.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,512,3,3) |
| backbone.bottom_up.res5.2.conv3.* | backbone.bottom_up.res5.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1) |
| backbone.bottom_up.stem.conv1.* | backbone.bottom_up.stem.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (64,) (64,) (64,) (64,) (64,3,7,7) |
| backbone.fpn_lateral3.* | backbone.fpn_lateral3.{bias,weight} | (256,) (256,512,1,1) |
| backbone.fpn_lateral4.* | backbone.fpn_lateral4.{bias,weight} | (256,) (256,1024,1,1) |
| backbone.fpn_lateral5.* | backbone.fpn_lateral5.{bias,weight} | (256,) (256,2048,1,1) |
| backbone.fpn_output3.* | backbone.fpn_output3.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output4.* | backbone.fpn_output4.{bias,weight} | (256,) (256,256,3,3) |
| backbone.fpn_output5.* | backbone.fpn_output5.{bias,weight} | (256,) (256,256,3,3) |
| backbone.top_block.p6.* | backbone.top_block.p6.{bias,weight} | (256,) (256,2048,3,3) |
| backbone.top_block.p7.* | backbone.top_block.p7.{bias,weight} | (256,) (256,256,3,3) |
| head.bbox_pred.* | head.bbox_pred.{bias,weight} | (36,) (36,256,3,3) |
| head.bbox_subnet.0.* | head.bbox_subnet.0.{bias,weight} | (256,) (256,256,3,3) |
| head.bbox_subnet.2.* | head.bbox_subnet.2.{bias,weight} | (256,) (256,256,3,3) |
| head.bbox_subnet.4.* | head.bbox_subnet.4.{bias,weight} | (256,) (256,256,3,3) |
| head.bbox_subnet.6.* | head.bbox_subnet.6.{bias,weight} | (256,) (256,256,3,3) |
| head.cls_score.* | head.cls_score.{bias,weight} | (63,) (63,256,3,3) |
| head.cls_subnet.0.* | head.cls_subnet.0.{bias,weight} | (256,) (256,256,3,3) |
| head.cls_subnet.2.* | head.cls_subnet.2.{bias,weight} | (256,) (256,256,3,3) |
| head.cls_subnet.4.* | head.cls_subnet.4.{bias,weight} | (256,) (256,256,3,3) |
| head.cls_subnet.6.* | head.cls_subnet.6.{bias,weight} | (256,) (256,256,3,3) |
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.engine import DefaultTrainer
evaluator = COCOEvaluator("rel_actions_val3", cfg, False, output_dir="./output_evals/")
val_loader = build_detection_test_loader(cfg, "rel_actions_val3")
inference_on_dataset(trainer.model, val_loader, evaluator)
WARNING [05/09 01:10:24 d2.evaluation.coco_evaluation]: COCO Evaluator instantiated using config, this is deprecated behavior. Please pass in explicit arguments instead. [05/09 01:10:24 d2.data.datasets.coco]: Loaded 2215 images in COCO format from ./data/instances_vcoco_actions_val_2014.json [05/09 01:10:24 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in inference: [ResizeShortestEdge(short_edge_length=(800, 800), max_size=1333, sample_style='choice')] [05/09 01:10:24 d2.data.common]: Serializing 2215 elements to byte tensors and concatenating them all ... [05/09 01:10:25 d2.data.common]: Serialized dataset takes 5.66 MiB [05/09 01:10:25 d2.evaluation.evaluator]: Start inference on 2215 batches [05/09 01:10:26 d2.evaluation.evaluator]: Inference done 11/2215. Dataloading: 0.0016 s/iter. Inference: 0.0780 s/iter. Eval: 0.0004 s/iter. Total: 0.0800 s/iter. ETA=0:02:56 [05/09 01:10:31 d2.evaluation.evaluator]: Inference done 69/2215. Dataloading: 0.0025 s/iter. Inference: 0.0842 s/iter. Eval: 0.0004 s/iter. Total: 0.0872 s/iter. ETA=0:03:07 [05/09 01:10:36 d2.evaluation.evaluator]: Inference done 127/2215. Dataloading: 0.0024 s/iter. Inference: 0.0839 s/iter. Eval: 0.0004 s/iter. Total: 0.0868 s/iter. ETA=0:03:01 [05/09 01:10:41 d2.evaluation.evaluator]: Inference done 186/2215. Dataloading: 0.0023 s/iter. Inference: 0.0837 s/iter. Eval: 0.0004 s/iter. Total: 0.0866 s/iter. ETA=0:02:55 [05/09 01:10:46 d2.evaluation.evaluator]: Inference done 244/2215. Dataloading: 0.0024 s/iter. Inference: 0.0838 s/iter. Eval: 0.0004 s/iter. Total: 0.0867 s/iter. ETA=0:02:50 [05/09 01:10:51 d2.evaluation.evaluator]: Inference done 301/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:02:46 [05/09 01:10:56 d2.evaluation.evaluator]: Inference done 359/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:02:41 [05/09 01:11:01 d2.evaluation.evaluator]: Inference done 417/2215. Dataloading: 0.0024 s/iter. Inference: 0.0840 s/iter. Eval: 0.0004 s/iter. Total: 0.0869 s/iter. ETA=0:02:36 [05/09 01:11:06 d2.evaluation.evaluator]: Inference done 474/2215. Dataloading: 0.0024 s/iter. Inference: 0.0842 s/iter. Eval: 0.0004 s/iter. Total: 0.0871 s/iter. ETA=0:02:31 [05/09 01:11:11 d2.evaluation.evaluator]: Inference done 532/2215. Dataloading: 0.0024 s/iter. Inference: 0.0842 s/iter. Eval: 0.0004 s/iter. Total: 0.0871 s/iter. ETA=0:02:26 [05/09 01:11:16 d2.evaluation.evaluator]: Inference done 591/2215. Dataloading: 0.0024 s/iter. Inference: 0.0840 s/iter. Eval: 0.0004 s/iter. Total: 0.0869 s/iter. ETA=0:02:21 [05/09 01:11:21 d2.evaluation.evaluator]: Inference done 650/2215. Dataloading: 0.0024 s/iter. Inference: 0.0839 s/iter. Eval: 0.0004 s/iter. Total: 0.0868 s/iter. ETA=0:02:15 [05/09 01:11:26 d2.evaluation.evaluator]: Inference done 707/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:02:11 [05/09 01:11:31 d2.evaluation.evaluator]: Inference done 765/2215. Dataloading: 0.0024 s/iter. Inference: 0.0842 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:02:06 [05/09 01:11:36 d2.evaluation.evaluator]: Inference done 823/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:02:01 [05/09 01:11:41 d2.evaluation.evaluator]: Inference done 881/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:01:56 [05/09 01:11:47 d2.evaluation.evaluator]: Inference done 939/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:01:50 [05/09 01:11:52 d2.evaluation.evaluator]: Inference done 996/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:01:46 [05/09 01:11:57 d2.evaluation.evaluator]: Inference done 1055/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:01:40 [05/09 01:12:02 d2.evaluation.evaluator]: Inference done 1112/2215. Dataloading: 0.0024 s/iter. Inference: 0.0842 s/iter. Eval: 0.0004 s/iter. Total: 0.0871 s/iter. ETA=0:01:36 [05/09 01:12:07 d2.evaluation.evaluator]: Inference done 1170/2215. Dataloading: 0.0024 s/iter. Inference: 0.0841 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:01:30 [05/09 01:12:12 d2.evaluation.evaluator]: Inference done 1228/2215. Dataloading: 0.0024 s/iter. Inference: 0.0842 s/iter. Eval: 0.0004 s/iter. Total: 0.0870 s/iter. ETA=0:01:25 [05/09 01:12:17 d2.evaluation.evaluator]: Inference done 1281/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0876 s/iter. ETA=0:01:21 [05/09 01:12:22 d2.evaluation.evaluator]: Inference done 1339/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0876 s/iter. ETA=0:01:16 [05/09 01:12:27 d2.evaluation.evaluator]: Inference done 1397/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0875 s/iter. ETA=0:01:11 [05/09 01:12:32 d2.evaluation.evaluator]: Inference done 1455/2215. Dataloading: 0.0024 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0875 s/iter. ETA=0:01:06 [05/09 01:12:37 d2.evaluation.evaluator]: Inference done 1512/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0875 s/iter. ETA=0:01:01 [05/09 01:12:42 d2.evaluation.evaluator]: Inference done 1569/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0875 s/iter. ETA=0:00:56 [05/09 01:12:47 d2.evaluation.evaluator]: Inference done 1626/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0876 s/iter. ETA=0:00:51 [05/09 01:12:52 d2.evaluation.evaluator]: Inference done 1684/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0876 s/iter. ETA=0:00:46 [05/09 01:12:57 d2.evaluation.evaluator]: Inference done 1741/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0876 s/iter. ETA=0:00:41 [05/09 01:13:02 d2.evaluation.evaluator]: Inference done 1800/2215. Dataloading: 0.0024 s/iter. Inference: 0.0847 s/iter. Eval: 0.0004 s/iter. Total: 0.0875 s/iter. ETA=0:00:36 [05/09 01:13:07 d2.evaluation.evaluator]: Inference done 1859/2215. Dataloading: 0.0023 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0875 s/iter. ETA=0:00:31 [05/09 01:13:13 d2.evaluation.evaluator]: Inference done 1918/2215. Dataloading: 0.0023 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0874 s/iter. ETA=0:00:25 [05/09 01:13:18 d2.evaluation.evaluator]: Inference done 1975/2215. Dataloading: 0.0024 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0874 s/iter. ETA=0:00:20 [05/09 01:13:23 d2.evaluation.evaluator]: Inference done 2033/2215. Dataloading: 0.0023 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0874 s/iter. ETA=0:00:15 [05/09 01:13:28 d2.evaluation.evaluator]: Inference done 2090/2215. Dataloading: 0.0023 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0874 s/iter. ETA=0:00:10 [05/09 01:13:33 d2.evaluation.evaluator]: Inference done 2148/2215. Dataloading: 0.0023 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0874 s/iter. ETA=0:00:05 [05/09 01:13:38 d2.evaluation.evaluator]: Inference done 2206/2215. Dataloading: 0.0023 s/iter. Inference: 0.0846 s/iter. Eval: 0.0004 s/iter. Total: 0.0874 s/iter. ETA=0:00:00 [05/09 01:13:39 d2.evaluation.evaluator]: Total inference time: 0:03:13.212745 (0.087427 s / iter per device, on 1 devices) [05/09 01:13:39 d2.evaluation.evaluator]: Total inference pure compute time: 0:03:06 (0.084569 s / iter per device, on 1 devices) [05/09 01:13:40 d2.evaluation.coco_evaluation]: Preparing results for COCO format ... [05/09 01:13:40 d2.evaluation.coco_evaluation]: Saving results to ./output_evals/coco_instances_results.json [05/09 01:13:40 d2.evaluation.coco_evaluation]: Evaluating predictions with unofficial COCO API... Loading and preparing results... DONE (t=0.80s) creating index... index created! [05/09 01:13:41 d2.evaluation.fast_eval_api]: Evaluate annotation type *bbox* [05/09 01:13:43 d2.evaluation.fast_eval_api]: COCOeval_opt.evaluate() finished in 1.60 seconds. [05/09 01:13:43 d2.evaluation.fast_eval_api]: Accumulating evaluation results... [05/09 01:13:43 d2.evaluation.fast_eval_api]: COCOeval_opt.accumulate() finished in 0.45 seconds. Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.150 Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.230 Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.166 Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.700 Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.183 Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.148 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.241 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.371 Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.375 Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.700 Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.338 Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.395 [05/09 01:13:43 d2.evaluation.coco_evaluation]: Evaluation results for bbox: | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:------:|:------:|:------:| | 14.973 | 22.975 | 16.646 | 70.000 | 18.309 | 14.828 | [05/09 01:13:43 d2.evaluation.coco_evaluation]: Per-category bbox AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-----------|:------|:-----------|:-------| | hold | 37.947 | carry | 5.420 | point | 0.000 | | eat | 12.513 | drink | 0.000 | stand | 48.934 | | talk_on_phone | 0.000 | | | | |
OrderedDict([('bbox',
{'AP': 14.973408472126575,
'AP-carry': 5.420342633782121,
'AP-drink': 0.0,
'AP-eat': 12.512506011482463,
'AP-hold': 37.9474266183727,
'AP-point': 0.0,
'AP-stand': 48.933584041248736,
'AP-talk_on_phone': 0.0,
'AP50': 22.974768706425476,
'AP75': 16.64620894499706,
'APl': 14.828050762899277,
'APm': 18.308672603489068,
'APs': 70.0})])
%ls ../GroceryImages
image01.jpeg image07.jpeg image13.jpeg image40.jpeg image46.jpeg image02.jpeg image08.jpeg image14.jpeg image41.jpeg image47.jpeg image03.jpeg image09.png image36.jpeg image42.jpeg image48.jpeg image04.jpeg image10.jpeg image37.jpeg image43.jpeg image49.jpeg image05.jpeg image11.png image38.jpeg image44.jpeg image50.jpeg image06.png image12.jpeg image39.jpeg image45.jpeg
image_list = ['image01.jpeg', 'image02.jpeg', 'image03.jpeg', 'image04.jpeg', 'image05.jpeg', 'image06.png', 'image07.jpeg', 'image08.jpeg', 'image09.png', 'image10.jpeg', 'image11.png', 'image12.jpeg', 'image13.jpeg', 'image14.jpeg', 'image36.jpeg', 'image37.jpeg', 'image38.jpeg', 'image39.jpeg', 'image40.jpeg', 'image41.jpeg', 'image42.jpeg', 'image43.jpeg', 'image44.jpeg', 'image45.jpeg', 'image46.jpeg', 'image47.jpeg', 'image48.jpeg', 'image49.jpeg', 'image50.jpeg']
for i in image_list:
im = cv2.imread(f"../GroceryImages/{i}")
print(i)
outputs = predictor(im)
v = Visualizer(im[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
cv2_imshow(out.get_image()[:, :, ::-1])
out.save(f"../sandbox/annotated_shopping/action_only_model/{i}")
image01.jpeg
image02.jpeg
image03.jpeg
image04.jpeg
image05.jpeg
image06.png